Compare commits
85 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a652e58c54 | ||
|
|
6f1720a5b7 | ||
|
|
865025f612 | ||
|
|
1975d32f2d | ||
|
|
4cf64f8e09 | ||
|
|
69265e4504 | ||
|
|
5f877d9458 | ||
|
|
c4d549919f | ||
|
|
dadcf317dc | ||
|
|
8b933e77cd | ||
|
|
3e8e335a5c | ||
|
|
940a71089d | ||
|
|
716fbaef74 | ||
|
|
d4bcd006b2 | ||
|
|
fd12788967 | ||
|
|
53afe47cec | ||
|
|
5235b053b4 | ||
|
|
b6408e9671 | ||
|
|
e43ba3acd4 | ||
|
|
e3c45b4338 | ||
|
|
175fe8aaeb | ||
|
|
6d28d288a3 | ||
|
|
ba165b1092 | ||
|
|
c56d893e77 | ||
|
|
c4d03f0154 | ||
|
|
7cb17834c7 | ||
|
|
f3317cf2db | ||
|
|
daca045fcd | ||
|
|
f69d3a6351 | ||
|
|
5907619a04 | ||
|
|
9567b3a293 | ||
|
|
c6f3831320 | ||
|
|
8abf0add04 | ||
|
|
5b6571c170 | ||
|
|
c27b81cb85 | ||
|
|
0e9a17b029 | ||
|
|
f3c18d622e | ||
|
|
8bdef4f951 | ||
|
|
a97120efc1 | ||
|
|
70c6506a7e | ||
|
|
6ca7241bd9 | ||
|
|
495a8d8d95 | ||
|
|
0964444529 | ||
|
|
c63e68c480 | ||
|
|
4aefd45193 | ||
|
|
6743982d28 | ||
|
|
57db3f6763 | ||
|
|
a206418846 | ||
|
|
10c4f23953 | ||
|
|
9e15193ef8 | ||
|
|
5d0dca73c6 | ||
|
|
08b270676b | ||
|
|
a6f7a44aab | ||
|
|
1230a0e7ce | ||
|
|
1f72bb733f | ||
|
|
e64c41efe8 | ||
|
|
4aa081b4e7 | ||
|
|
95261639fb | ||
|
|
75a8b304d4 | ||
|
|
fb420358a9 | ||
|
|
79e54abe19 | ||
|
|
91e239d66f | ||
|
|
2be32eb3d2 | ||
|
|
c52233ec8b | ||
|
|
9a3737120d | ||
|
|
565fce71b1 | ||
|
|
1a3ff252a4 | ||
|
|
3091b40691 | ||
|
|
9db2c734c9 | ||
|
|
3fe542cf60 | ||
|
|
b3118ee316 | ||
|
|
f1e5314f1a | ||
|
|
e140e2ebc6 | ||
|
|
021d28c9b8 | ||
|
|
701ce1c9d0 | ||
|
|
e4ff140b99 | ||
|
|
a91d3fc639 | ||
|
|
4c36b78567 | ||
|
|
623b2e4b8f | ||
|
|
a3eb91ed8c | ||
|
|
fbe8d1ceaa | ||
|
|
b6a87b422e | ||
|
|
1f3cc036da | ||
|
|
b67be7154d | ||
|
|
a9f58593d4 |
@@ -24,7 +24,7 @@ matrix:
|
||||
- os: osx
|
||||
env: NAME="macos build"
|
||||
sudo: false
|
||||
osx_image: xcode10.1
|
||||
osx_image: xcode10.2
|
||||
install: "./.travis/macos/deps.sh"
|
||||
script: "./.travis/macos/build.sh"
|
||||
after_success: "./.travis/macos/upload.sh"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#!/bin/bash -ex
|
||||
mkdir "$HOME/.ccache" || true
|
||||
docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
|
||||
docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#!/bin/sh -ex
|
||||
|
||||
docker pull ubuntu:18.04
|
||||
docker pull yuzuemu/build-environments:linux-mingw
|
||||
|
||||
@@ -1,16 +1,6 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
cd /yuzu
|
||||
MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
|
||||
apt-get update
|
||||
apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
|
||||
echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
|
||||
apt-get update
|
||||
apt-get install -y ${MINGW_PACKAGES}
|
||||
|
||||
# fix a problem in current MinGW headers
|
||||
wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
|
||||
# override Travis CI unreasonable ccache size
|
||||
echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"
|
||||
|
||||
@@ -23,8 +13,8 @@ echo '' >> /bin/cmd
|
||||
chmod +x /bin/cmd
|
||||
|
||||
mkdir build && cd build
|
||||
cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
|
||||
make -j4
|
||||
cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
|
||||
ninja
|
||||
|
||||
# Clean up the dirty hacks
|
||||
rm /bin/uname && mv /bin/uname1 /bin/uname
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
mkdir -p "$HOME/.ccache"
|
||||
docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
|
||||
docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#!/bin/sh -ex
|
||||
|
||||
docker pull ubuntu:18.04
|
||||
docker pull yuzuemu/build-environments:linux-fresh
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
apt-get update
|
||||
apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
|
||||
|
||||
cd /yuzu
|
||||
|
||||
mkdir build && cd build
|
||||
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
|
||||
cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
|
||||
ninja
|
||||
|
||||
ccache -s
|
||||
|
||||
@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
|
||||
export UNICORNDIR=$(pwd)/externals/unicorn
|
||||
export PATH="/usr/local/opt/ccache/libexec:$PATH"
|
||||
|
||||
# TODO: Build using ninja instead of make
|
||||
mkdir build && cd build
|
||||
cmake --version
|
||||
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
|
||||
|
||||
@@ -19,7 +19,7 @@ set(BUILD_VERSION "0")
|
||||
if (BUILD_REPOSITORY)
|
||||
# regex capture the string nightly or canary into CMAKE_MATCH_1
|
||||
string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
|
||||
if (${CMAKE_MATCH_COUNT} GREATER 0)
|
||||
if ("${CMAKE_MATCH_COUNT}" GREATER 0)
|
||||
# capitalize the first letter of each word in the repo name.
|
||||
string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
|
||||
foreach(WORD ${REPO_NAME_LIST})
|
||||
|
||||
8
externals/CMakeLists.txt
vendored
8
externals/CMakeLists.txt
vendored
@@ -7,6 +7,10 @@ include(DownloadExternals)
|
||||
add_library(catch-single-include INTERFACE)
|
||||
target_include_directories(catch-single-include INTERFACE catch/single_include)
|
||||
|
||||
# libfmt
|
||||
add_subdirectory(fmt)
|
||||
add_library(fmt::fmt ALIAS fmt)
|
||||
|
||||
# Dynarmic
|
||||
if (ARCHITECTURE_x86_64)
|
||||
set(DYNARMIC_TESTS OFF)
|
||||
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
|
||||
add_subdirectory(dynarmic)
|
||||
endif()
|
||||
|
||||
# libfmt
|
||||
add_subdirectory(fmt)
|
||||
add_library(fmt::fmt ALIAS fmt)
|
||||
|
||||
# getopt
|
||||
if (MSVC)
|
||||
add_subdirectory(getopt)
|
||||
|
||||
2
externals/dynarmic
vendored
2
externals/dynarmic
vendored
Submodule externals/dynarmic updated: 4e6848d1c9...2683a9a3e3
@@ -21,15 +21,29 @@ if (MSVC)
|
||||
# Ensure that projects build with Unicode support.
|
||||
add_definitions(-DUNICODE -D_UNICODE)
|
||||
|
||||
# /W3 - Level 3 warnings
|
||||
# /MP - Multi-threaded compilation
|
||||
# /Zi - Output debugging information
|
||||
# /Zo - enhanced debug info for optimized builds
|
||||
# /permissive- - enables stricter C++ standards conformance checks
|
||||
# /EHsc - C++-only exception handling semantics
|
||||
# /Zc:throwingNew - let codegen assume `operator new` will never return null
|
||||
# /Zc:inline - let codegen omit inline functions in object files
|
||||
add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
|
||||
# /W3 - Level 3 warnings
|
||||
# /MP - Multi-threaded compilation
|
||||
# /Zi - Output debugging information
|
||||
# /Zo - Enhanced debug info for optimized builds
|
||||
# /permissive- - Enables stricter C++ standards conformance checks
|
||||
# /EHsc - C++-only exception handling semantics
|
||||
# /volatile:iso - Use strict standards-compliant volatile semantics.
|
||||
# /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
|
||||
# /Zc:inline - Let codegen omit inline functions in object files
|
||||
# /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
|
||||
add_compile_options(
|
||||
/W3
|
||||
/MP
|
||||
/Zi
|
||||
/Zo
|
||||
/permissive-
|
||||
/EHsc
|
||||
/std:c++latest
|
||||
/volatile:iso
|
||||
/Zc:externConstexpr
|
||||
/Zc:inline
|
||||
/Zc:throwingNew
|
||||
)
|
||||
|
||||
# /GS- - No stack buffer overflow checks
|
||||
add_compile_options("$<$<CONFIG:Release>:/GS->")
|
||||
@@ -37,7 +51,10 @@ if (MSVC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
|
||||
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
|
||||
else()
|
||||
add_compile_options("-Wno-attributes")
|
||||
add_compile_options(
|
||||
-Wall
|
||||
-Wno-attributes
|
||||
)
|
||||
|
||||
if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
|
||||
add_compile_options("-stdlib=libc++")
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <zstd.h>
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
namespace Core::Frontend {
|
||||
|
||||
GraphicsContext::~GraphicsContext() = default;
|
||||
|
||||
class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
|
||||
public std::enable_shared_from_this<TouchState> {
|
||||
public:
|
||||
|
||||
@@ -19,6 +19,8 @@ namespace Core::Frontend {
|
||||
*/
|
||||
class GraphicsContext {
|
||||
public:
|
||||
virtual ~GraphicsContext();
|
||||
|
||||
/// Makes the graphics context current for the caller thread
|
||||
virtual void MakeCurrent() = 0;
|
||||
|
||||
|
||||
@@ -241,7 +241,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
|
||||
}
|
||||
|
||||
Process::Process(Core::System& system)
|
||||
: WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
|
||||
: WaitObject{system.Kernel()}, vm_manager{system},
|
||||
address_arbiter{system}, mutex{system}, system{system} {}
|
||||
|
||||
Process::~Process() = default;
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
VMManager::VMManager() {
|
||||
VMManager::VMManager(Core::System& system) : system{system} {
|
||||
// Default to assuming a 39-bit address space. This way we have a sane
|
||||
// starting point with executables that don't provide metadata.
|
||||
Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
|
||||
@@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
|
||||
VirtualMemoryArea& final_vma = vma_handle->second;
|
||||
ASSERT(final_vma.size == size);
|
||||
|
||||
auto& system = Core::System::GetInstance();
|
||||
system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
|
||||
VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
|
||||
@@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
|
||||
VirtualMemoryArea& final_vma = vma_handle->second;
|
||||
ASSERT(final_vma.size == size);
|
||||
|
||||
auto& system = Core::System::GetInstance();
|
||||
system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
|
||||
@@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
|
||||
|
||||
ASSERT(FindVMA(target)->second.size >= size);
|
||||
|
||||
auto& system = Core::System::GetInstance();
|
||||
system.ArmInterface(0).UnmapMemory(target, size);
|
||||
system.ArmInterface(1).UnmapMemory(target, size);
|
||||
system.ArmInterface(2).UnmapMemory(target, size);
|
||||
@@ -376,7 +373,7 @@ ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64
|
||||
Reprotect(src_vma_iter, VMAPermission::ReadWrite);
|
||||
|
||||
if (dst_memory_state == MemoryState::ModuleCode) {
|
||||
Core::System::GetInstance().InvalidateCpuInstructionCaches();
|
||||
system.InvalidateCpuInstructionCaches();
|
||||
}
|
||||
|
||||
return unmap_result;
|
||||
|
||||
@@ -14,6 +14,10 @@
|
||||
#include "core/hle/result.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace FileSys {
|
||||
enum class ProgramAddressSpaceType : u8;
|
||||
}
|
||||
@@ -321,7 +325,7 @@ class VMManager final {
|
||||
public:
|
||||
using VMAHandle = VMAMap::const_iterator;
|
||||
|
||||
VMManager();
|
||||
explicit VMManager(Core::System& system);
|
||||
~VMManager();
|
||||
|
||||
/// Clears the address space map, re-initializing with a single free area.
|
||||
@@ -712,5 +716,7 @@ private:
|
||||
// The end of the currently allocated heap. This is not an inclusive
|
||||
// end of the range. This is essentially 'base_address + current_size'.
|
||||
VAddr heap_end = 0;
|
||||
|
||||
Core::System& system;
|
||||
};
|
||||
} // namespace Kernel
|
||||
|
||||
@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called.");
|
||||
|
||||
// This service function is currently hardcoded on the
|
||||
// actual console to this value (as of 6.0.0).
|
||||
// actual console to this value (as of 8.0.0).
|
||||
constexpr s32 target_min_volume = 0;
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Audio, "called.");
|
||||
|
||||
// This service function is currently hardcoded on the
|
||||
// actual console to this value (as of 6.0.0).
|
||||
// actual console to this value (as of 8.0.0).
|
||||
constexpr s32 target_max_volume = 15;
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
|
||||
@@ -21,8 +21,6 @@
|
||||
#include "core/memory.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
#pragma optimize("", off)
|
||||
|
||||
namespace Loader {
|
||||
namespace {
|
||||
struct MODHeader {
|
||||
|
||||
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
|
||||
|
||||
std::string ReadCString(VAddr vaddr, std::size_t max_length);
|
||||
|
||||
enum class FlushMode {
|
||||
/// Write back modified surfaces to RAM
|
||||
Flush,
|
||||
/// Remove region from the cache
|
||||
Invalidate,
|
||||
/// Write back modified surfaces to RAM, and also remove them from the cache
|
||||
FlushAndInvalidate,
|
||||
};
|
||||
|
||||
/**
|
||||
* Mark each page touching the region as cached.
|
||||
*/
|
||||
|
||||
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
|
||||
}
|
||||
|
||||
TelemetrySession::TelemetrySession() {
|
||||
#ifdef ENABLE_WEB_SERVICE
|
||||
backend = std::make_unique<WebService::TelemetryJson>(
|
||||
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
|
||||
#else
|
||||
backend = std::make_unique<Telemetry::NullVisitor>();
|
||||
#endif
|
||||
// Log one-time top-level information
|
||||
AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
|
||||
|
||||
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
|
||||
.count()};
|
||||
AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
|
||||
|
||||
#ifdef ENABLE_WEB_SERVICE
|
||||
auto backend = std::make_unique<WebService::TelemetryJson>(
|
||||
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
|
||||
#else
|
||||
auto backend = std::make_unique<Telemetry::NullVisitor>();
|
||||
#endif
|
||||
|
||||
// Complete the session, submitting to web service if necessary
|
||||
// This is just a placeholder to wrap up the session once the core completes and this is
|
||||
// destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
|
||||
field_collection.Accept(*backend);
|
||||
if (Settings::values.enable_telemetry)
|
||||
backend->Complete();
|
||||
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
|
||||
|
||||
bool TelemetrySession::SubmitTestcase() {
|
||||
#ifdef ENABLE_WEB_SERVICE
|
||||
auto backend = std::make_unique<WebService::TelemetryJson>(
|
||||
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
|
||||
field_collection.Accept(*backend);
|
||||
return backend->SubmitTestcase();
|
||||
#else
|
||||
|
||||
@@ -39,7 +39,6 @@ public:
|
||||
|
||||
private:
|
||||
Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
|
||||
std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,8 @@ add_library(video_core STATIC
|
||||
dma_pusher.h
|
||||
debug_utils/debug_utils.cpp
|
||||
debug_utils/debug_utils.h
|
||||
engines/engine_upload.cpp
|
||||
engines/engine_upload.h
|
||||
engines/fermi_2d.cpp
|
||||
engines/fermi_2d.h
|
||||
engines/kepler_compute.cpp
|
||||
|
||||
@@ -105,6 +105,8 @@ bool DmaPusher::Step() {
|
||||
dma_state.non_incrementing = false;
|
||||
dma_increment_once = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
48
src/video_core/engines/engine_upload.cpp
Normal file
48
src/video_core/engines/engine_upload.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
State::State(MemoryManager& memory_manager, Registers& regs)
|
||||
: memory_manager(memory_manager), regs(regs) {}
|
||||
|
||||
void State::ProcessExec(const bool is_linear) {
|
||||
write_offset = 0;
|
||||
copy_size = regs.line_length_in * regs.line_count;
|
||||
inner_buffer.resize(copy_size);
|
||||
this->is_linear = is_linear;
|
||||
}
|
||||
|
||||
void State::ProcessData(const u32 data, const bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
|
||||
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
|
||||
write_offset += sub_copy_size;
|
||||
if (!is_last_call) {
|
||||
return;
|
||||
}
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
if (is_linear) {
|
||||
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
|
||||
} else {
|
||||
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
||||
tmp_buffer.resize(dst_size);
|
||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
|
||||
regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
|
||||
tmp_buffer.data());
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
75
src/video_core/engines/engine_upload.h
Normal file
75
src/video_core/engines/engine_upload.h
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines::Upload {
|
||||
|
||||
struct Registers {
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 z;
|
||||
u32 x;
|
||||
u32 y;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return 1U << block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return 1U << block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return 1U << block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
};
|
||||
|
||||
class State {
|
||||
public:
|
||||
State(MemoryManager& memory_manager, Registers& regs);
|
||||
~State() = default;
|
||||
|
||||
void ProcessExec(const bool is_linear);
|
||||
void ProcessData(const u32 data, const bool is_last_call);
|
||||
|
||||
private:
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
std::vector<u8> tmp_buffer;
|
||||
bool is_linear = false;
|
||||
Registers& regs;
|
||||
MemoryManager& memory_manager;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines::Upload
|
||||
@@ -21,6 +21,12 @@ class RasterizerInterface;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as G80_2D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
|
||||
*/
|
||||
|
||||
#define FERMI2D_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
|
||||
|
||||
|
||||
@@ -4,12 +4,21 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
|
||||
KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
|
||||
memory_manager,
|
||||
regs.upload} {}
|
||||
|
||||
KeplerCompute::~KeplerCompute() = default;
|
||||
|
||||
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
|
||||
switch (method_call.method) {
|
||||
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KEPLER_COMPUTE_REG_INDEX(launch):
|
||||
// Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
|
||||
// kernels)
|
||||
UNREACHABLE_MSG("Compute shaders are not implemented");
|
||||
ProcessLaunch();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
|
||||
|
||||
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
|
||||
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -6,22 +6,40 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GK104_Compute. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerCompute final {
|
||||
public:
|
||||
explicit KeplerCompute(MemoryManager& memory_manager);
|
||||
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager);
|
||||
~KeplerCompute();
|
||||
|
||||
static constexpr std::size_t NumConstBuffers = 8;
|
||||
@@ -31,30 +49,181 @@ public:
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0xAF);
|
||||
INSERT_PADDING_WORDS(0x60);
|
||||
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec_upload;
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_PADDING_WORDS(0x3F);
|
||||
|
||||
struct {
|
||||
u32 address;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
|
||||
}
|
||||
} launch_desc_loc;
|
||||
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
u32 launch;
|
||||
|
||||
INSERT_PADDING_WORDS(0xC48);
|
||||
INSERT_PADDING_WORDS(0x4A7);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
INSERT_PADDING_WORDS(0x22);
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} code_loc;
|
||||
|
||||
INSERT_PADDING_WORDS(0x3FE);
|
||||
|
||||
u32 texture_const_buffer_index;
|
||||
|
||||
INSERT_PADDING_WORDS(0x374);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
struct LaunchParams {
|
||||
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
u32 program_start;
|
||||
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
|
||||
BitField<30, 1, u32> linked_tsc;
|
||||
|
||||
BitField<0, 31, u32> grid_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> grid_dim_y;
|
||||
BitField<16, 16, u32> grid_dim_z;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
BitField<0, 16, u32> shared_alloc;
|
||||
|
||||
BitField<0, 31, u32> block_dim_x;
|
||||
union {
|
||||
BitField<0, 16, u32> block_dim_y;
|
||||
BitField<16, 16, u32> block_dim_z;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> const_buffer_enable_mask;
|
||||
BitField<29, 2, u32> cache_layout;
|
||||
} memory_config;
|
||||
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
|
||||
struct {
|
||||
u32 address_low;
|
||||
union {
|
||||
BitField<0, 8, u32> address_high;
|
||||
BitField<15, 17, u32> size;
|
||||
};
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} const_buffer_config[8];
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_pos_alloc;
|
||||
BitField<27, 5, u32> barrier_alloc;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 20, u32> local_neg_alloc;
|
||||
BitField<24, 5, u32> gpr_alloc;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x11);
|
||||
} launch_description;
|
||||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
|
||||
"KeplerCompute Regs has wrong size");
|
||||
|
||||
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
|
||||
"KeplerCompute LaunchParams has wrong size");
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
void CallMethod(const GPU::MethodCall& method_call);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
MemoryManager& memory_manager;
|
||||
Upload::State upload_state;
|
||||
|
||||
void ProcessLaunch();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(launch, 0xAF);
|
||||
ASSERT_REG_POSITION(tsc, 0x557);
|
||||
ASSERT_REG_POSITION(tic, 0x55D);
|
||||
ASSERT_REG_POSITION(code_loc, 0x582);
|
||||
ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
|
||||
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
|
||||
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
|
||||
: system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
|
||||
|
||||
KeplerMemory::~KeplerMemory() = default;
|
||||
|
||||
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
|
||||
|
||||
switch (method_call.method) {
|
||||
case KEPLERMEMORY_REG_INDEX(exec): {
|
||||
ProcessExec();
|
||||
upload_state.ProcessExec(regs.exec.linear != 0);
|
||||
break;
|
||||
}
|
||||
case KEPLERMEMORY_REG_INDEX(data): {
|
||||
ProcessData(method_call.argument, method_call.IsLastCall());
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeplerMemory::ProcessExec() {
|
||||
state.write_offset = 0;
|
||||
state.copy_size = regs.line_length_in * regs.line_count;
|
||||
state.inner_buffer.resize(state.copy_size);
|
||||
}
|
||||
|
||||
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
||||
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
||||
std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size);
|
||||
state.write_offset += sub_copy_size;
|
||||
if (is_last_call) {
|
||||
const GPUVAddr address{regs.dest.Address()};
|
||||
if (regs.exec.linear != 0) {
|
||||
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
|
||||
} else {
|
||||
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
||||
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
||||
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
||||
std::vector<u8> tmp_buffer(dst_size);
|
||||
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
|
||||
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
|
||||
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
|
||||
state.inner_buffer.data(), tmp_buffer.data());
|
||||
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
|
||||
}
|
||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -20,19 +21,20 @@ namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as P2MF. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
|
||||
*/
|
||||
|
||||
#define KEPLERMEMORY_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
|
||||
|
||||
class KeplerMemory final {
|
||||
public:
|
||||
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager);
|
||||
KeplerMemory(Core::System& system, MemoryManager& memory_manager);
|
||||
~KeplerMemory();
|
||||
|
||||
/// Write the value to the register identified by method.
|
||||
@@ -45,42 +47,7 @@ public:
|
||||
struct {
|
||||
INSERT_PADDING_WORDS(0x60);
|
||||
|
||||
u32 line_length_in;
|
||||
u32 line_count;
|
||||
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 pitch;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 depth;
|
||||
u32 z;
|
||||
u32 x;
|
||||
u32 y;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return 1U << block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return 1U << block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return 1U << block_depth.Value();
|
||||
}
|
||||
} dest;
|
||||
Upload::Registers upload;
|
||||
|
||||
struct {
|
||||
union {
|
||||
@@ -96,28 +63,17 @@ public:
|
||||
};
|
||||
} regs{};
|
||||
|
||||
struct {
|
||||
u32 write_offset = 0;
|
||||
u32 copy_size = 0;
|
||||
std::vector<u8> inner_buffer;
|
||||
} state{};
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
void ProcessExec();
|
||||
void ProcessData(u32 data, bool is_last_call);
|
||||
Upload::State upload_state;
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(line_length_in, 0x60);
|
||||
ASSERT_REG_POSITION(line_count, 0x61);
|
||||
ASSERT_REG_POSITION(dest, 0x62);
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec, 0x6C);
|
||||
ASSERT_REG_POSITION(data, 0x6D);
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
|
||||
|
||||
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
MemoryManager& memory_manager)
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
|
||||
*this} {
|
||||
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
|
||||
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
|
||||
InitializeRegisterDefaults();
|
||||
}
|
||||
|
||||
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||
ProcessSyncPoint();
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(exec_upload): {
|
||||
upload_state.ProcessExec(regs.exec_upload.linear != 0);
|
||||
break;
|
||||
}
|
||||
case MAXWELL3D_REG_INDEX(data_upload): {
|
||||
const bool is_last_call = method_call.IsLastCall();
|
||||
upload_state.ProcessData(method_call.argument, is_last_call);
|
||||
if (is_last_call) {
|
||||
dirty_flags.OnMemoryWrite();
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/engines/engine_upload.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/macro_interpreter.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
@@ -32,6 +33,12 @@ class RasterizerInterface;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GF100_3D. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
|
||||
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
|
||||
*/
|
||||
|
||||
#define MAXWELL3D_REG_INDEX(field_name) \
|
||||
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
|
||||
|
||||
@@ -243,9 +250,10 @@ public:
|
||||
return "10_10_10_2";
|
||||
case Size::Size_11_11_10:
|
||||
return "11_11_10";
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string TypeString() const {
|
||||
@@ -579,7 +587,18 @@ public:
|
||||
u32 bind;
|
||||
} macros;
|
||||
|
||||
INSERT_PADDING_WORDS(0x69);
|
||||
INSERT_PADDING_WORDS(0x17);
|
||||
|
||||
Upload::Registers upload;
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 1, u32> linear;
|
||||
};
|
||||
} exec_upload;
|
||||
|
||||
u32 data_upload;
|
||||
|
||||
INSERT_PADDING_WORDS(0x44);
|
||||
|
||||
struct {
|
||||
union {
|
||||
@@ -1175,6 +1194,8 @@ private:
|
||||
/// Interpreter for the macro codes uploaded to the GPU.
|
||||
MacroInterpreter macro_interpreter;
|
||||
|
||||
Upload::State upload_state;
|
||||
|
||||
/// Retrieves information about a specific TIC entry from the TIC buffer.
|
||||
Texture::TICEntry GetTICEntry(u32 tic_index) const;
|
||||
|
||||
@@ -1218,6 +1239,9 @@ private:
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(macros, 0x45);
|
||||
ASSERT_REG_POSITION(upload, 0x60);
|
||||
ASSERT_REG_POSITION(exec_upload, 0x6C);
|
||||
ASSERT_REG_POSITION(data_upload, 0x6D);
|
||||
ASSERT_REG_POSITION(sync_info, 0xB2);
|
||||
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
|
||||
ASSERT_REG_POSITION(rt, 0x200);
|
||||
|
||||
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
|
||||
|
||||
ASSERT(regs.exec.enable_2d == 1);
|
||||
|
||||
const std::size_t copy_size = regs.x_count * regs.y_count;
|
||||
|
||||
auto source_ptr{memory_manager.GetPointer(source)};
|
||||
auto dst_ptr{memory_manager.GetPointer(dest)};
|
||||
|
||||
if (!source_ptr) {
|
||||
LOG_ERROR(HW_GPU, "source_ptr is invalid");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!dst_ptr) {
|
||||
LOG_ERROR(HW_GPU, "dst_ptr is invalid");
|
||||
return;
|
||||
}
|
||||
|
||||
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
|
||||
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
|
||||
// copying.
|
||||
rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
|
||||
|
||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||
// cache. We do this before actually writing the new data because the destination address
|
||||
// might contain a dirty surface that will have to be written back to memory.
|
||||
rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
|
||||
};
|
||||
|
||||
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
|
||||
ASSERT(regs.src_params.size_z == 1);
|
||||
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
|
||||
|
||||
const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
|
||||
const std::size_t src_size = Texture::CalculateSize(
|
||||
true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
|
||||
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
|
||||
|
||||
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
|
||||
copy_size * src_bytes_per_pixel);
|
||||
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
||||
|
||||
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
|
||||
regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
|
||||
regs.src_params.BlockHeight(), regs.src_params.pos_x,
|
||||
regs.src_params.pos_y);
|
||||
regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
|
||||
write_buffer.data(), regs.src_params.BlockHeight(),
|
||||
regs.src_params.pos_x, regs.src_params.pos_y);
|
||||
|
||||
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
|
||||
} else {
|
||||
ASSERT(regs.dst_params.size_z == 1);
|
||||
ASSERT(regs.src_pitch == regs.x_count);
|
||||
ASSERT(regs.dst_params.BlockDepth() == 1);
|
||||
|
||||
const u32 src_bpp = regs.src_pitch / regs.x_count;
|
||||
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
|
||||
|
||||
FlushAndInvalidate(regs.src_pitch * regs.y_count,
|
||||
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
|
||||
const std::size_t dst_size = Texture::CalculateSize(
|
||||
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
|
||||
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
|
||||
|
||||
const std::size_t dst_layer_size = Texture::CalculateSize(
|
||||
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
|
||||
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
|
||||
|
||||
const std::size_t src_size = regs.src_pitch * regs.y_count;
|
||||
|
||||
if (read_buffer.size() < src_size) {
|
||||
read_buffer.resize(src_size);
|
||||
}
|
||||
|
||||
if (write_buffer.size() < dst_size) {
|
||||
write_buffer.resize(dst_size);
|
||||
}
|
||||
|
||||
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
||||
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
||||
|
||||
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
|
||||
src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
|
||||
src_bytes_per_pixel,
|
||||
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
|
||||
read_buffer.data(), regs.dst_params.BlockHeight());
|
||||
|
||||
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
@@ -25,6 +26,11 @@ class RasterizerInterface;
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
/**
|
||||
* This Engine is known as GK104_Copy. Documentation can be found in:
|
||||
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
|
||||
*/
|
||||
|
||||
class MaxwellDMA final {
|
||||
public:
|
||||
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||
@@ -63,6 +69,16 @@ public:
|
||||
|
||||
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
|
||||
|
||||
enum class ComponentMode : u32 {
|
||||
Src0 = 0,
|
||||
Src1 = 1,
|
||||
Src2 = 2,
|
||||
Src3 = 3,
|
||||
Const0 = 4,
|
||||
Const1 = 5,
|
||||
Zero = 6,
|
||||
};
|
||||
|
||||
enum class CopyMode : u32 {
|
||||
None = 0,
|
||||
Unk1 = 1,
|
||||
@@ -128,7 +144,26 @@ public:
|
||||
u32 x_count;
|
||||
u32 y_count;
|
||||
|
||||
INSERT_PADDING_WORDS(0xBB);
|
||||
INSERT_PADDING_WORDS(0xB8);
|
||||
|
||||
u32 const0;
|
||||
u32 const1;
|
||||
union {
|
||||
BitField<0, 4, ComponentMode> component0;
|
||||
BitField<4, 4, ComponentMode> component1;
|
||||
BitField<8, 4, ComponentMode> component2;
|
||||
BitField<12, 4, ComponentMode> component3;
|
||||
BitField<16, 2, u32> component_size;
|
||||
BitField<20, 3, u32> src_num_components;
|
||||
BitField<24, 3, u32> dst_num_components;
|
||||
|
||||
u32 SrcBytePerPixel() const {
|
||||
return src_num_components.Value() * component_size.Value();
|
||||
}
|
||||
u32 DstBytePerPixel() const {
|
||||
return dst_num_components.Value() * component_size.Value();
|
||||
}
|
||||
} swizzle_config;
|
||||
|
||||
Parameters dst_params;
|
||||
|
||||
@@ -149,6 +184,9 @@ private:
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
|
||||
std::vector<u8> read_buffer;
|
||||
std::vector<u8> write_buffer;
|
||||
|
||||
/// Performs the copy from the source buffer to the destination buffer as configured in the
|
||||
/// registers.
|
||||
void HandleCopy();
|
||||
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
|
||||
ASSERT_REG_POSITION(dst_pitch, 0x105);
|
||||
ASSERT_REG_POSITION(x_count, 0x106);
|
||||
ASSERT_REG_POSITION(y_count, 0x107);
|
||||
ASSERT_REG_POSITION(const0, 0x1C0);
|
||||
ASSERT_REG_POSITION(const1, 0x1C1);
|
||||
ASSERT_REG_POSITION(swizzle_config, 0x1C2);
|
||||
ASSERT_REG_POSITION(dst_params, 0x1C3);
|
||||
ASSERT_REG_POSITION(src_params, 0x1CA);
|
||||
|
||||
|
||||
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
|
||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
|
||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
|
||||
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
|
||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
|
||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
|
||||
}
|
||||
|
||||
GPU::~GPU() = default;
|
||||
|
||||
@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
|
||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
||||
} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
|
||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||
return;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
|
||||
@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
|
||||
UpdatePageTableForVMA(initial_vma);
|
||||
}
|
||||
|
||||
MemoryManager::~MemoryManager() = default;
|
||||
|
||||
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
|
||||
const u64 aligned_size{Common::AlignUp(size, page_size)};
|
||||
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
|
||||
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
|
||||
bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
|
||||
const GPUVAddr end = start + size;
|
||||
const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
|
||||
const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
|
||||
const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
|
||||
const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
|
||||
return range == size;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,8 @@ struct VirtualMemoryArea {
|
||||
|
||||
class MemoryManager final {
|
||||
public:
|
||||
MemoryManager(VideoCore::RasterizerInterface& rasterizer);
|
||||
explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
|
||||
~MemoryManager();
|
||||
|
||||
GPUVAddr AllocateSpace(u64 size, u64 align);
|
||||
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
|
||||
@@ -65,18 +66,18 @@ public:
|
||||
u8* GetPointer(GPUVAddr addr);
|
||||
const u8* GetPointer(GPUVAddr addr) const;
|
||||
|
||||
// Returns true if the block is continous in host memory, false otherwise
|
||||
bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
|
||||
/// Returns true if the block is continuous in host memory, false otherwise
|
||||
bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
|
||||
|
||||
/**
|
||||
* ReadBlock and WriteBlock are full read and write operations over virtual
|
||||
* GPU Memory. It's important to use these when GPU memory may not be continous
|
||||
* GPU Memory. It's important to use these when GPU memory may not be continuous
|
||||
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
|
||||
* Flushes and Invalidations, respectively to each operation.
|
||||
*/
|
||||
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
|
||||
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
|
||||
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
|
||||
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
|
||||
|
||||
/**
|
||||
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
|
||||
@@ -88,9 +89,9 @@ public:
|
||||
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
|
||||
* being flushed.
|
||||
*/
|
||||
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
|
||||
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
|
||||
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
|
||||
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
|
||||
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
|
||||
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
|
||||
|
||||
private:
|
||||
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
|
||||
@@ -111,10 +112,10 @@ private:
|
||||
/**
|
||||
* Maps an unmanaged host memory pointer at a given address.
|
||||
*
|
||||
* @param target The guest address to start the mapping at.
|
||||
* @param memory The memory to be mapped.
|
||||
* @param size Size of the mapping.
|
||||
* @param state MemoryState tag to attach to the VMA.
|
||||
* @param target The guest address to start the mapping at.
|
||||
* @param memory The memory to be mapped.
|
||||
* @param size Size of the mapping in bytes.
|
||||
* @param backing_addr The base address of the range to back this mapping.
|
||||
*/
|
||||
VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
|
||||
|
||||
@@ -124,7 +125,7 @@ private:
|
||||
/// Converts a VMAHandle to a mutable VMAIter.
|
||||
VMAIter StripIterConstness(const VMAHandle& iter);
|
||||
|
||||
/// Marks as the specfied VMA as allocated.
|
||||
/// Marks as the specified VMA as allocated.
|
||||
VMAIter Allocate(VMAIter vma);
|
||||
|
||||
/**
|
||||
|
||||
@@ -37,9 +37,6 @@ public:
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
virtual std::size_t GetSizeInBytes() const = 0;
|
||||
|
||||
/// Wriets any cached resources back to memory
|
||||
virtual void Flush() = 0;
|
||||
|
||||
/// Sets whether the cached object should be considered registered
|
||||
void SetIsRegistered(bool registered) {
|
||||
is_registered = registered;
|
||||
@@ -158,6 +155,8 @@ protected:
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
virtual void FlushObjectInner(const T& object) = 0;
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
@@ -165,7 +164,7 @@ protected:
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
object->Flush();
|
||||
FlushObjectInner(object);
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
|
||||
@@ -42,9 +42,6 @@ public:
|
||||
return alignment;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
@@ -75,6 +72,9 @@ public:
|
||||
protected:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
||||
|
||||
private:
|
||||
OGLStreamBuffer stream_buffer;
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ public:
|
||||
/// Reloads the global region from guest memory
|
||||
void Reload(u32 size_);
|
||||
|
||||
void Flush() override;
|
||||
void Flush();
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
@@ -65,6 +65,11 @@ public:
|
||||
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const GlobalRegion& object) override {
|
||||
object->Flush();
|
||||
}
|
||||
|
||||
private:
|
||||
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
|
||||
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
|
||||
|
||||
@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
|
||||
// MakeQuadArray always generates u32 indexes
|
||||
params.index_format = GL_UNSIGNED_INT;
|
||||
params.count = (regs.vertex_buffer.count / 4) * 6;
|
||||
params.index_buffer_offset =
|
||||
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
|
||||
params.index_buffer_offset = primitive_assembler.MakeQuadArray(
|
||||
regs.vertex_buffer.first, regs.vertex_buffer.count);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
@@ -305,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
shader_program_manager->UseTrivialGeometryShader();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -920,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
|
||||
viewport.y = viewport_rect.bottom;
|
||||
viewport.width = viewport_rect.GetWidth();
|
||||
viewport.height = viewport_rect.GetHeight();
|
||||
viewport.depth_range_far = regs.viewports[i].depth_range_far;
|
||||
viewport.depth_range_near = regs.viewports[i].depth_range_near;
|
||||
viewport.depth_range_far = src.depth_range_far;
|
||||
viewport.depth_range_near = src.depth_range_near;
|
||||
}
|
||||
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
|
||||
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
|
||||
|
||||
@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
|
||||
void CachedSurface::LoadGLBuffer() {
|
||||
void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
|
||||
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
|
||||
gl_buffer.resize(params.max_mip_level);
|
||||
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
|
||||
if (gl_buffer.size() < params.max_mip_level)
|
||||
gl_buffer.resize(params.max_mip_level);
|
||||
for (u32 i = 0; i < params.max_mip_level; i++)
|
||||
gl_buffer[i].resize(params.GetMipmapSizeGL(i));
|
||||
if (params.is_tiled) {
|
||||
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
|
||||
void CachedSurface::FlushGLBuffer() {
|
||||
void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
|
||||
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
|
||||
|
||||
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
|
||||
|
||||
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
|
||||
// OpenGL temporary buffer needs to be big enough to store raw texture size
|
||||
gl_buffer.resize(1);
|
||||
gl_buffer[0].resize(GetSizeInBytes());
|
||||
|
||||
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
|
||||
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
|
||||
}
|
||||
}
|
||||
|
||||
void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle) {
|
||||
void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
|
||||
GLuint read_fb_handle, GLuint draw_fb_handle) {
|
||||
const auto& rect{params.GetRect(mip_map)};
|
||||
|
||||
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
|
||||
|
||||
// Load data from memory to the surface
|
||||
const auto x0 = static_cast<GLint>(rect.left);
|
||||
const auto y0 = static_cast<GLint>(rect.bottom);
|
||||
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
|
||||
tuple.type, &gl_buffer[mip_map][buffer_offset]);
|
||||
break;
|
||||
case SurfaceTarget::TextureCubemap: {
|
||||
std::size_t start = buffer_offset;
|
||||
for (std::size_t face = 0; face < params.depth; ++face) {
|
||||
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
|
||||
static_cast<GLsizei>(rect.GetWidth()),
|
||||
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
|
||||
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
|
||||
void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
|
||||
GLuint read_fb_handle, GLuint draw_fb_handle) {
|
||||
MICROPROFILE_SCOPE(OpenGL_TextureUL);
|
||||
|
||||
for (u32 i = 0; i < params.max_mip_level; i++)
|
||||
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
|
||||
UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
|
||||
}
|
||||
|
||||
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
||||
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
|
||||
}
|
||||
|
||||
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
||||
surface->LoadGLBuffer();
|
||||
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||
surface->LoadGLBuffer(temporal_memory);
|
||||
surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
|
||||
surface->MarkAsModified(false, *this);
|
||||
surface->MarkForReload(false);
|
||||
}
|
||||
|
||||
@@ -355,6 +355,12 @@ namespace OpenGL {
|
||||
|
||||
class RasterizerOpenGL;
|
||||
|
||||
// This is used to store temporary big buffers,
|
||||
// instead of creating/destroying all the time
|
||||
struct RasterizerTemporaryMemory {
|
||||
std::vector<std::vector<u8>> gl_buffer;
|
||||
};
|
||||
|
||||
class CachedSurface final : public RasterizerCacheObject {
|
||||
public:
|
||||
explicit CachedSurface(const SurfaceParams& params);
|
||||
@@ -371,10 +377,6 @@ public:
|
||||
return memory_size;
|
||||
}
|
||||
|
||||
void Flush() override {
|
||||
FlushGLBuffer();
|
||||
}
|
||||
|
||||
const OGLTexture& Texture() const {
|
||||
return texture;
|
||||
}
|
||||
@@ -397,11 +399,12 @@ public:
|
||||
}
|
||||
|
||||
// Read/Write data in Switch memory to/from gl_buffer
|
||||
void LoadGLBuffer();
|
||||
void FlushGLBuffer();
|
||||
void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
|
||||
void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
|
||||
|
||||
// Upload data in gl_buffer to this surface's texture
|
||||
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle);
|
||||
|
||||
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
|
||||
Tegra::Texture::SwizzleSource swizzle_y,
|
||||
@@ -429,13 +432,13 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
|
||||
GLuint read_fb_handle, GLuint draw_fb_handle);
|
||||
|
||||
void EnsureTextureDiscrepantView();
|
||||
|
||||
OGLTexture texture;
|
||||
OGLTexture discrepant_view;
|
||||
std::vector<std::vector<u8>> gl_buffer;
|
||||
SurfaceParams params{};
|
||||
GLenum gl_target{};
|
||||
GLenum gl_internal_format{};
|
||||
@@ -473,6 +476,11 @@ public:
|
||||
void SignalPreDrawCall();
|
||||
void SignalPostDrawCall();
|
||||
|
||||
protected:
|
||||
void FlushObjectInner(const Surface& object) override {
|
||||
object->FlushGLBuffer(temporal_memory);
|
||||
}
|
||||
|
||||
private:
|
||||
void LoadSurface(const Surface& surface);
|
||||
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
|
||||
@@ -519,6 +527,8 @@ private:
|
||||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
|
||||
Surface last_depth_buffer;
|
||||
|
||||
RasterizerTemporaryMemory temporal_memory;
|
||||
|
||||
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
|
||||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
|
||||
|
||||
|
||||
@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
const Device& device)
|
||||
: RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
|
||||
: RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
|
||||
|
||||
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
@@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
if (stop_loading)
|
||||
return;
|
||||
|
||||
// Track if precompiled cache was altered during loading to know if we have to serialize the
|
||||
// virtual precompiled cache file back to the hard drive
|
||||
bool precompiled_cache_altered = false;
|
||||
|
||||
// Build shaders
|
||||
if (callback)
|
||||
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
|
||||
@@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
if (!shader) {
|
||||
// Invalidate the precompiled cache if a shader dumped shader was rejected
|
||||
disk_cache.InvalidatePrecompiled();
|
||||
precompiled_cache_altered = true;
|
||||
dumps.clear();
|
||||
}
|
||||
}
|
||||
@@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
if (dumps.find(usage) == dumps.end()) {
|
||||
const auto& program = precompiled_programs.at(usage);
|
||||
disk_cache.SaveDump(usage, program->handle);
|
||||
precompiled_cache_altered = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (precompiled_cache_altered) {
|
||||
disk_cache.SaveVirtualPrecompiledFile();
|
||||
}
|
||||
}
|
||||
|
||||
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||
|
||||
@@ -57,9 +57,6 @@ public:
|
||||
return shader_length;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const GLShader::ShaderEntries& GetShaderEntries() const {
|
||||
return entries;
|
||||
@@ -123,6 +120,10 @@ public:
|
||||
/// Gets the current specified shader stage program
|
||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
|
||||
private:
|
||||
std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
|
||||
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
|
||||
|
||||
@@ -871,17 +871,6 @@ private:
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string Composite(Operation operation) {
|
||||
std::string value = "vec4(";
|
||||
for (std::size_t i = 0; i < 4; ++i) {
|
||||
value += Visit(operation[i]);
|
||||
if (i < 3)
|
||||
value += ", ";
|
||||
}
|
||||
value += ')';
|
||||
return value;
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
std::string Add(Operation operation) {
|
||||
return GenerateBinaryInfix(operation, "+", type, type, type);
|
||||
|
||||
@@ -104,7 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
|
||||
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
|
||||
: system{system}, precompiled_cache_virtual_file_offset{0} {}
|
||||
|
||||
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
|
||||
ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
@@ -177,6 +178,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return {{raws, usages}};
|
||||
}
|
||||
|
||||
@@ -208,59 +210,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
|
||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
|
||||
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||
// Read compressed file from disk and decompress to virtual precompiled cache file
|
||||
std::vector<u8> compressed(file.GetSize());
|
||||
file.ReadBytes(compressed.data(), compressed.size());
|
||||
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
|
||||
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
|
||||
ShaderCacheVersionHash file_hash{};
|
||||
if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
|
||||
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return {};
|
||||
}
|
||||
if (GetShaderCacheVersionHash() != file_hash) {
|
||||
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
return {};
|
||||
}
|
||||
|
||||
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
|
||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
|
||||
while (file.Tell() < file.GetSize()) {
|
||||
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
||||
PrecompiledEntryKind kind{};
|
||||
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
|
||||
if (!LoadObjectFromPrecompiled(kind)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
switch (kind) {
|
||||
case PrecompiledEntryKind::Decompiled: {
|
||||
u64 unique_identifier{};
|
||||
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
|
||||
if (!LoadObjectFromPrecompiled(unique_identifier)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto entry = LoadDecompiledEntry(file);
|
||||
if (!entry)
|
||||
const auto entry = LoadDecompiledEntry();
|
||||
if (!entry) {
|
||||
return {};
|
||||
}
|
||||
decompiled.insert({unique_identifier, std::move(*entry)});
|
||||
break;
|
||||
}
|
||||
case PrecompiledEntryKind::Dump: {
|
||||
ShaderDiskCacheUsage usage;
|
||||
if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
|
||||
if (!LoadObjectFromPrecompiled(usage)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
ShaderDiskCacheDump dump;
|
||||
if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
|
||||
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
u32 binary_length{};
|
||||
u32 compressed_size{};
|
||||
if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
|
||||
if (!LoadObjectFromPrecompiled(binary_length)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<u8> compressed_binary(compressed_size);
|
||||
if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
|
||||
compressed_binary.size()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
|
||||
if (dump.binary.empty()) {
|
||||
dump.binary.resize(binary_length);
|
||||
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -274,45 +281,41 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||
return {{decompiled, dumps}};
|
||||
}
|
||||
|
||||
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
|
||||
FileUtil::IOFile& file) {
|
||||
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
|
||||
u32 code_size{};
|
||||
u32 compressed_code_size{};
|
||||
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
|
||||
if (!LoadObjectFromPrecompiled(code_size)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<u8> compressed_code(compressed_code_size);
|
||||
if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
|
||||
std::vector<u8> code(code_size);
|
||||
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
|
||||
if (code.empty()) {
|
||||
return {};
|
||||
}
|
||||
ShaderDiskCacheDecompiled entry;
|
||||
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
|
||||
|
||||
u32 const_buffers_count{};
|
||||
if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
|
||||
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < const_buffers_count; ++i) {
|
||||
u32 max_offset{};
|
||||
u32 index{};
|
||||
u8 is_indirect{};
|
||||
if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
|
||||
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(is_indirect)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
|
||||
}
|
||||
|
||||
u32 samplers_count{};
|
||||
if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
|
||||
if (!LoadObjectFromPrecompiled(samplers_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < samplers_count; ++i) {
|
||||
u64 offset{};
|
||||
u64 index{};
|
||||
@@ -320,12 +323,9 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
u8 is_array{};
|
||||
u8 is_shadow{};
|
||||
u8 is_bindless{};
|
||||
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
|
||||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
|
||||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
|
||||
file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
|
||||
file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
|
||||
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
|
||||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
|
||||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
|
||||
@@ -335,17 +335,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
}
|
||||
|
||||
u32 global_memory_count{};
|
||||
if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
|
||||
if (!LoadObjectFromPrecompiled(global_memory_count)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < global_memory_count; ++i) {
|
||||
u32 cbuf_index{};
|
||||
u32 cbuf_offset{};
|
||||
u8 is_read{};
|
||||
u8 is_written{};
|
||||
if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
|
||||
file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
|
||||
file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
|
||||
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
|
||||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
|
||||
return {};
|
||||
}
|
||||
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
|
||||
@@ -354,74 +354,81 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
|
||||
|
||||
for (auto& clip_distance : entry.entries.clip_distances) {
|
||||
u8 clip_distance_raw{};
|
||||
if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
|
||||
if (!LoadObjectFromPrecompiled(clip_distance_raw))
|
||||
return {};
|
||||
clip_distance = clip_distance_raw != 0;
|
||||
}
|
||||
|
||||
u64 shader_length{};
|
||||
if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
|
||||
if (!LoadObjectFromPrecompiled(shader_length)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
|
||||
const std::string& code,
|
||||
const std::vector<u8>& compressed_code,
|
||||
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
|
||||
const GLShader::ShaderEntries& entries) {
|
||||
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
|
||||
file.WriteObject(unique_identifier) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
|
||||
file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
|
||||
!SaveObjectToPrecompiled(unique_identifier) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
|
||||
!SaveArrayToPrecompiled(code.data(), code.size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& cbuf : entries.const_buffers) {
|
||||
if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
|
||||
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& gmem : entries.global_memory_entries) {
|
||||
if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
|
||||
file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (const bool clip_distance : entries.clip_distances) {
|
||||
if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
|
||||
if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
|
||||
if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
|
||||
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
|
||||
if (!FileUtil::Delete(GetTransferablePath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
|
||||
GetTransferablePath());
|
||||
@@ -429,7 +436,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
|
||||
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
|
||||
// Clear virtaul precompiled cache file
|
||||
precompiled_cache_virtual_file.Resize(0);
|
||||
|
||||
if (!FileUtil::Delete(GetPrecompiledPath())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
|
||||
}
|
||||
@@ -465,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
|
||||
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
|
||||
|
||||
auto& usages{it->second};
|
||||
ASSERT(usages.find(usage) == usages.end());
|
||||
if (usages.find(usage) != usages.end()) {
|
||||
// Skip this variant since the shader is already stored.
|
||||
return;
|
||||
}
|
||||
usages.insert(usage);
|
||||
|
||||
FileUtil::IOFile file = AppendTransferableFile();
|
||||
@@ -485,22 +498,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
|
||||
if (!IsUsable())
|
||||
return;
|
||||
|
||||
const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
|
||||
reinterpret_cast<const u8*>(code.data()), code.size())};
|
||||
if (compressed_code.empty()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
|
||||
unique_identifier);
|
||||
return;
|
||||
if (precompiled_cache_virtual_file.GetSize() == 0) {
|
||||
SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
}
|
||||
|
||||
FileUtil::IOFile file = AppendPrecompiledFile();
|
||||
if (!file.IsOpen())
|
||||
return;
|
||||
|
||||
if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
|
||||
if (!SaveDecompiledFile(unique_identifier, code, entries)) {
|
||||
LOG_ERROR(Render_OpenGL,
|
||||
"Failed to save decompiled entry to the precompiled file - removing");
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
}
|
||||
}
|
||||
@@ -516,28 +520,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
|
||||
std::vector<u8> binary(binary_length);
|
||||
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
|
||||
|
||||
const std::vector<u8> compressed_binary =
|
||||
Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
|
||||
|
||||
if (compressed_binary.empty()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
|
||||
usage.unique_identifier);
|
||||
return;
|
||||
}
|
||||
|
||||
FileUtil::IOFile file = AppendPrecompiledFile();
|
||||
if (!file.IsOpen())
|
||||
return;
|
||||
|
||||
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
|
||||
file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
|
||||
file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
|
||||
compressed_binary.size()) {
|
||||
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
|
||||
!SaveObjectToPrecompiled(usage) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
|
||||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
|
||||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
|
||||
usage.unique_identifier);
|
||||
file.Close();
|
||||
InvalidatePrecompiled();
|
||||
return;
|
||||
}
|
||||
@@ -570,28 +559,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
|
||||
return file;
|
||||
}
|
||||
|
||||
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
|
||||
if (!EnsureDirectories())
|
||||
return {};
|
||||
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
|
||||
const auto hash{GetShaderCacheVersionHash()};
|
||||
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
|
||||
LOG_ERROR(
|
||||
Render_OpenGL,
|
||||
"Failed to write precompiled cache version hash to virtual precompiled cache file");
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
|
||||
precompiled_cache_virtual_file_offset = 0;
|
||||
const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
|
||||
const std::vector<u8>& compressed =
|
||||
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
|
||||
|
||||
const auto precompiled_path{GetPrecompiledPath()};
|
||||
const bool existed = FileUtil::Exists(precompiled_path);
|
||||
FileUtil::IOFile file(precompiled_path, "wb");
|
||||
|
||||
FileUtil::IOFile file(precompiled_path, "ab");
|
||||
if (!file.IsOpen()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
if (!existed || file.GetSize() == 0) {
|
||||
const auto hash{GetShaderCacheVersionHash()};
|
||||
if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
|
||||
precompiled_path);
|
||||
return {};
|
||||
}
|
||||
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
|
||||
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
|
||||
precompiled_path);
|
||||
return;
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "core/file_sys/vfs_vector.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
|
||||
@@ -172,10 +173,10 @@ public:
|
||||
LoadPrecompiled();
|
||||
|
||||
/// Removes the transferable (and precompiled) cache file.
|
||||
void InvalidateTransferable() const;
|
||||
void InvalidateTransferable();
|
||||
|
||||
/// Removes the precompiled cache file.
|
||||
void InvalidatePrecompiled() const;
|
||||
/// Removes the precompiled cache file and clears virtual precompiled cache file.
|
||||
void InvalidatePrecompiled();
|
||||
|
||||
/// Saves a raw dump to the transferable file. Checks for collisions.
|
||||
void SaveRaw(const ShaderDiskCacheRaw& entry);
|
||||
@@ -190,18 +191,21 @@ public:
|
||||
/// Saves a dump entry to the precompiled file. Does not check for collisions.
|
||||
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
|
||||
|
||||
/// Serializes virtual precompiled shader cache file to real file
|
||||
void SaveVirtualPrecompiledFile();
|
||||
|
||||
private:
|
||||
/// Loads the transferable cache. Returns empty on failure.
|
||||
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
|
||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
|
||||
LoadPrecompiledFile(FileUtil::IOFile& file);
|
||||
|
||||
/// Loads a decompiled cache entry from the passed file. Returns empty on failure.
|
||||
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
|
||||
/// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
|
||||
/// failure.
|
||||
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
|
||||
|
||||
/// Saves a decompiled entry to the passed file. Returns true on success.
|
||||
bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
|
||||
const std::vector<u8>& compressed_code,
|
||||
bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
|
||||
const GLShader::ShaderEntries& entries);
|
||||
|
||||
/// Returns if the cache can be used
|
||||
@@ -210,8 +214,8 @@ private:
|
||||
/// Opens current game's transferable file and write it's header if it doesn't exist
|
||||
FileUtil::IOFile AppendTransferableFile() const;
|
||||
|
||||
/// Opens current game's precompiled file and write it's header if it doesn't exist
|
||||
FileUtil::IOFile AppendPrecompiledFile() const;
|
||||
/// Save precompiled header to precompiled_cache_in_memory
|
||||
void SavePrecompiledHeaderToVirtualPrecompiledCache();
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
@@ -234,10 +238,42 @@ private:
|
||||
/// Get current game's title id
|
||||
std::string GetTitleID() const;
|
||||
|
||||
template <typename T>
|
||||
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
|
||||
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += write_length;
|
||||
return write_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
|
||||
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
|
||||
data, length, precompiled_cache_virtual_file_offset);
|
||||
precompiled_cache_virtual_file_offset += read_length;
|
||||
return read_length == sizeof(T) * length;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SaveObjectToPrecompiled(const T& object) {
|
||||
return SaveArrayToPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool LoadObjectFromPrecompiled(T& object) {
|
||||
return LoadArrayFromPrecompiled(&object, 1);
|
||||
}
|
||||
|
||||
// Copre system
|
||||
Core::System& system;
|
||||
// Stored transferable shaders
|
||||
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
|
||||
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
|
||||
// file
|
||||
FileSys::VectorVfsFile precompiled_cache_virtual_file;
|
||||
// Stores the current offset of the precompiled cache file for IO purposes
|
||||
std::size_t precompiled_cache_virtual_file_offset;
|
||||
|
||||
// The cache has been loaded at boot
|
||||
bool tried_to_load{};
|
||||
};
|
||||
|
||||
@@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case Maxwell::VertexAttribute::Type::UnsignedInt:
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
|
||||
|
||||
case Maxwell::VertexAttribute::Type::UnsignedNorm:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8:
|
||||
@@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_UNSIGNED_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
case Maxwell::VertexAttribute::Type::SignedInt:
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm: {
|
||||
|
||||
case Maxwell::VertexAttribute::Type::SignedNorm:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_8:
|
||||
case Maxwell::VertexAttribute::Size::Size_8_8:
|
||||
@@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
return GL_INT;
|
||||
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
|
||||
return GL_INT_2_10_10_10_REV;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
case Maxwell::VertexAttribute::Type::Float: {
|
||||
case Maxwell::VertexAttribute::Type::Float:
|
||||
switch (attrib.size) {
|
||||
case Maxwell::VertexAttribute::Size::Size_16:
|
||||
case Maxwell::VertexAttribute::Size::Size_16_16:
|
||||
@@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32:
|
||||
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
|
||||
return GL_FLOAT;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
|
||||
@@ -129,10 +126,11 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
return GL_TRIANGLES;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return GL_TRIANGLE_STRIP;
|
||||
default:
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
|
||||
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
|
||||
@@ -186,9 +184,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
} else {
|
||||
return GL_MIRROR_CLAMP_TO_EDGE;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return GL_REPEAT;
|
||||
}
|
||||
|
||||
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
|
||||
|
||||
@@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
|
||||
case Tegra::Texture::WrapMode::MirrorOnceBorder:
|
||||
UNIMPLEMENTED();
|
||||
return vk::SamplerAddressMode::eMirrorClampToEdge;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
|
||||
@@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
|
||||
return {};
|
||||
}
|
||||
|
||||
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
|
||||
|
||||
@@ -49,9 +49,6 @@ public:
|
||||
return alignment;
|
||||
}
|
||||
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void Flush() override {}
|
||||
|
||||
private:
|
||||
VAddr cpu_addr{};
|
||||
std::size_t size{};
|
||||
@@ -87,6 +84,10 @@ public:
|
||||
return buffer_handle;
|
||||
}
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
|
||||
|
||||
private:
|
||||
void AlignBuffer(std::size_t alignment);
|
||||
|
||||
|
||||
@@ -315,7 +315,6 @@ private:
|
||||
constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
|
||||
"overflow"};
|
||||
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
|
||||
const auto flag_code = static_cast<InternalFlag>(flag);
|
||||
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
|
||||
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
|
||||
}
|
||||
|
||||
@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
|
||||
// Continue scanning for an exit method.
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return exit_method = ExitMethod::AlwaysReturn;
|
||||
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||
return pc + 1;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using Tegra::Shader::HalfType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
|
||||
@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
|
||||
}
|
||||
}
|
||||
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
|
||||
|
||||
const bool negate_a =
|
||||
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
|
||||
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
|
||||
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
|
||||
|
||||
Node op_b = [&]() {
|
||||
auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_C:
|
||||
case OpCode::Id::HMUL2_C:
|
||||
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
|
||||
return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::HADD2_R:
|
||||
case OpCode::Id::HMUL2_R:
|
||||
return GetRegister(instr.gpr20);
|
||||
return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Immediate(0);
|
||||
return {HalfType::F32, Immediate(0)};
|
||||
}
|
||||
}();
|
||||
op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
|
||||
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
|
||||
op_b = UnpackHalfFloat(op_b, type_b);
|
||||
// redeclaration to avoid a bug in clang with reusing local bindings in lambdas
|
||||
Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
|
||||
|
||||
Node value = [&]() {
|
||||
switch (opcode->get().GetId()) {
|
||||
case OpCode::Id::HADD2_C:
|
||||
case OpCode::Id::HADD2_R:
|
||||
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
|
||||
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
|
||||
case OpCode::Id::HMUL2_C:
|
||||
case OpCode::Id::HMUL2_R:
|
||||
return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
|
||||
return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
|
||||
return Immediate(0);
|
||||
}
|
||||
}();
|
||||
value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
|
||||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -120,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
|
||||
return Operation(OperationCode::FCeil, PRECISE, value);
|
||||
case Tegra::Shader::F2fRoundingOp::Trunc:
|
||||
return Operation(OperationCode::FTrunc, PRECISE, value);
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
|
||||
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
|
||||
return Immediate(0);
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
|
||||
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
|
||||
return Immediate(0);
|
||||
}();
|
||||
value = GetSaturatedFloat(value, instr.alu.saturate_d);
|
||||
|
||||
|
||||
@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
case OpCode::Id::HFMA2_CR:
|
||||
neg_b = instr.hfma2.negate_b;
|
||||
neg_c = instr.hfma2.negate_c;
|
||||
return {instr.hfma2.saturate, instr.hfma2.type_b,
|
||||
return {instr.hfma2.saturate, HalfType::F32,
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
|
||||
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
|
||||
case OpCode::Id::HFMA2_RC:
|
||||
neg_b = instr.hfma2.negate_b;
|
||||
neg_c = instr.hfma2.negate_c;
|
||||
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
|
||||
instr.hfma2.type_b,
|
||||
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
|
||||
case OpCode::Id::HFMA2_RR:
|
||||
neg_b = instr.hfma2.rr.negate_b;
|
||||
neg_c = instr.hfma2.rr.negate_c;
|
||||
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
return {false, identity, Immediate(0), identity, Immediate(0)};
|
||||
}
|
||||
}();
|
||||
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
|
||||
|
||||
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
|
||||
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
|
||||
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
|
||||
|
||||
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
|
||||
value = GetSaturatedHalfFloat(value, saturate);
|
||||
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
|
||||
|
||||
SetRegister(bb, instr.gpr0, value);
|
||||
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
|
||||
return pc;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||
bool is_array, bool is_aoffi) {
|
||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||
|
||||
// If enabled arrays index is always stored in the gpr8 field
|
||||
const u64 array_register = instr.gpr8.Value();
|
||||
|
||||
@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
|
||||
instr.xmad.mode,
|
||||
Immediate(static_cast<u32>(instr.xmad.imm20_16)),
|
||||
GetRegister(instr.gpr39)};
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
|
||||
return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
|
||||
return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
|
||||
}();
|
||||
|
||||
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
|
||||
|
||||
@@ -439,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
|
||||
return OperationCode::LogicalUGreaterEqual;
|
||||
case OperationCode::INegate:
|
||||
UNREACHABLE_MSG("Can't negate an unsigned integer");
|
||||
return {};
|
||||
case OperationCode::IAbsolute:
|
||||
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
|
||||
return {};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
|
||||
return {};
|
||||
}
|
||||
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
@@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
|
||||
return PixelFormat::ABGR8S;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
return PixelFormat::ABGR8UI;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::B5G6R5:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::UNORM:
|
||||
return PixelFormat::B5G6R5U;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::A2B10G10R10:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::UNORM:
|
||||
return PixelFormat::A2B10G10R10U;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::A1B5G5R5:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::UNORM:
|
||||
return PixelFormat::A1B5G5R5U;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R8:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::UNORM:
|
||||
return PixelFormat::R8U;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
return PixelFormat::R8UI;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::G8R8:
|
||||
// TextureFormat::G8R8 is actually ordered red then green, as such we can use
|
||||
// PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
|
||||
@@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
|
||||
return PixelFormat::RG8U;
|
||||
case Tegra::Texture::ComponentType::SNORM:
|
||||
return PixelFormat::RG8S;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::UNORM:
|
||||
return PixelFormat::RGBA16U;
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::RGBA16F;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::BF10GF11RF11:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::R11FG11FB10F;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::RGBA32F;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
return PixelFormat::RGBA32UI;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R32_G32:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::RG32F;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
return PixelFormat::RG32UI;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R32_G32_B32:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::RGB32F;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R16:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
@@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
|
||||
return PixelFormat::R16UI;
|
||||
case Tegra::Texture::ComponentType::SINT:
|
||||
return PixelFormat::R16I;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::R32:
|
||||
switch (component_type) {
|
||||
case Tegra::Texture::ComponentType::FLOAT:
|
||||
return PixelFormat::R32F;
|
||||
case Tegra::Texture::ComponentType::UINT:
|
||||
return PixelFormat::R32UI;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::ZF32:
|
||||
return PixelFormat::Z32F;
|
||||
case Tegra::Texture::TextureFormat::Z16:
|
||||
@@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
|
||||
return PixelFormat::DXN2UNORM;
|
||||
case Tegra::Texture::ComponentType::SNORM:
|
||||
return PixelFormat::DXN2SNORM;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
case Tegra::Texture::TextureFormat::BC7U:
|
||||
return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
|
||||
case Tegra::Texture::TextureFormat::BC6H_UF16:
|
||||
@@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
|
||||
return PixelFormat::RG16UI;
|
||||
case Tegra::Texture::ComponentType::SINT:
|
||||
return PixelFormat::RG16I;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
|
||||
static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
return PixelFormat::ABGR8U;
|
||||
break;
|
||||
}
|
||||
LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
|
||||
static_cast<u32>(component_type));
|
||||
UNREACHABLE();
|
||||
return PixelFormat::ABGR8U;
|
||||
}
|
||||
|
||||
ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
|
||||
@@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) {
|
||||
case PixelFormat::DXT45_SRGB:
|
||||
case PixelFormat::BC7U_SRGB:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
||||
@@ -25,8 +25,8 @@
|
||||
|
||||
class InputBitStream {
|
||||
public:
|
||||
explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
|
||||
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
|
||||
explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
|
||||
: m_CurByte(ptr), m_NextBit(start_offset % 8) {}
|
||||
|
||||
~InputBitStream() = default;
|
||||
|
||||
@@ -55,12 +55,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
const int m_NumBits;
|
||||
const unsigned char* m_CurByte;
|
||||
int m_NextBit = 0;
|
||||
int m_BitsRead = 0;
|
||||
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
class OutputBitStream {
|
||||
@@ -114,7 +111,6 @@ private:
|
||||
const int m_NumBits;
|
||||
unsigned char* m_CurByte;
|
||||
int m_NextBit = 0;
|
||||
int m_BitsRead = 0;
|
||||
|
||||
bool done = false;
|
||||
};
|
||||
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
|
||||
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t block_width, uint32_t block_height) {
|
||||
uint32_t blockIdx = 0;
|
||||
std::size_t depth_offset = 0;
|
||||
std::vector<uint8_t> outData(height * width * depth * 4);
|
||||
for (uint32_t k = 0; k < depth; k++) {
|
||||
for (uint32_t j = 0; j < height; j += block_height) {
|
||||
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
|
||||
uint32_t decompWidth = std::min(block_width, width - i);
|
||||
uint32_t decompHeight = std::min(block_height, height - j);
|
||||
|
||||
uint8_t* outRow = outData.data() + (j * width + i) * 4;
|
||||
uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
|
||||
for (uint32_t jj = 0; jj < decompHeight; jj++) {
|
||||
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
|
||||
}
|
||||
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
|
||||
blockIdx++;
|
||||
}
|
||||
}
|
||||
depth_offset += height * width * 4;
|
||||
}
|
||||
|
||||
return outData;
|
||||
|
||||
@@ -58,7 +58,7 @@ void CompatDB::Submit() {
|
||||
|
||||
button(NextButton)->setEnabled(false);
|
||||
button(NextButton)->setText(tr("Submitting"));
|
||||
button(QWizard::CancelButton)->setVisible(false);
|
||||
button(CancelButton)->setVisible(false);
|
||||
|
||||
testcase_watcher.setFuture(QtConcurrent::run(
|
||||
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
|
||||
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
|
||||
tr("An error occured while sending the Testcase"));
|
||||
button(NextButton)->setEnabled(true);
|
||||
button(NextButton)->setText(tr("Next"));
|
||||
button(QWizard::CancelButton)->setVisible(true);
|
||||
button(CancelButton)->setVisible(true);
|
||||
} else {
|
||||
next();
|
||||
// older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
|
||||
// workaround
|
||||
button(QWizard::CancelButton)->setVisible(false);
|
||||
button(CancelButton)->setVisible(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
|
||||
ui->hotkeysTab->Populate(registry);
|
||||
this->setConfiguration();
|
||||
this->PopulateSelectionList();
|
||||
|
||||
setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
|
||||
|
||||
connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
|
||||
&ConfigureDialog::UpdateVisibleTabs);
|
||||
|
||||
adjustSize();
|
||||
ui->selectorList->setCurrentRow(0);
|
||||
|
||||
|
||||
@@ -69,16 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
ConfigureGraphics::~ConfigureGraphics() = default;
|
||||
|
||||
void ConfigureGraphics::setConfiguration() {
|
||||
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
|
||||
|
||||
ui->resolution_factor_combobox->setCurrentIndex(
|
||||
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
|
||||
ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
|
||||
ui->frame_limit->setValue(Settings::values.frame_limit);
|
||||
ui->use_compatibility_profile->setEnabled(runtime_lock);
|
||||
ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile);
|
||||
ui->use_disk_shader_cache->setEnabled(runtime_lock);
|
||||
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
|
||||
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
|
||||
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
|
||||
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
|
||||
ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
|
||||
ui->force_30fps_mode->setEnabled(runtime_lock);
|
||||
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
|
||||
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
|
||||
Settings::values.bg_blue));
|
||||
|
||||
@@ -67,8 +67,6 @@ public:
|
||||
|
||||
private:
|
||||
struct Hotkey {
|
||||
Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
|
||||
|
||||
QKeySequence keyseq;
|
||||
QShortcut* shortcut = nullptr;
|
||||
Qt::ShortcutContext context = Qt::WindowShortcut;
|
||||
|
||||
@@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
|
||||
|
||||
SDL_SetMainReady();
|
||||
|
||||
const SDL_GLprofile profile = Settings::values.use_compatibility_profile
|
||||
? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY
|
||||
: SDL_GL_CONTEXT_PROFILE_CORE;
|
||||
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile);
|
||||
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
|
||||
SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
|
||||
SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
|
||||
|
||||
@@ -222,6 +222,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
|
||||
|
||||
emu_window->MakeCurrent();
|
||||
system.Renderer().Rasterizer().LoadDiskResources();
|
||||
|
||||
while (emu_window->IsOpen()) {
|
||||
|
||||
Reference in New Issue
Block a user