Compare commits

..

85 Commits

Author SHA1 Message Date
ReinUsesLisp
a652e58c54 gl_rasterizer: Pass the right number of array quad vertices count 2019-05-17 17:08:34 -03:00
bunnei
6f1720a5b7 Merge pull request #2477 from ReinUsesLisp/fix-sdl2
yuzu_cmd: Make OpenGL's context current
2019-05-17 13:04:33 -04:00
bunnei
865025f612 Merge pull request #2478 from ReinUsesLisp/sdl2-compat
yuzu_cmd: Use OpenGL compat when asked in the settings
2019-05-17 13:04:04 -04:00
bunnei
1975d32f2d Merge pull request #2479 from ReinUsesLisp/qt-shadow
qt/configure_graphics: Shadow options at runtime
2019-05-17 13:01:41 -04:00
ReinUsesLisp
4cf64f8e09 qt/configure_graphics: Shadow options at runtime
Compatibility profile and the disk shader cache settings shouldn't
be changed at runtime. This aims to address that shadowing those
options.
2019-05-17 04:29:20 -03:00
ReinUsesLisp
69265e4504 yuzu_cmd: Use OpenGL compat when asked in the settings 2019-05-17 04:25:26 -03:00
ReinUsesLisp
5f877d9458 yuzu_cmd: Make OpenGL's context current
The SDL2 frontend never bound the OpenGL context, resulting on a white
screen and no-ops all over the backend.
2019-05-17 04:13:20 -03:00
Mat M
c4d549919f Merge pull request #2462 from lioncash/video-mm
video_core/memory_manager: Minor tidying
2019-05-14 06:40:33 -04:00
Mat M
dadcf317dc Merge pull request #2461 from lioncash/unused-var
video_core: Remove a few unused variables and functions
2019-05-14 06:36:26 -04:00
Mat M
8b933e77cd Merge pull request #2460 from lioncash/volatile
CMakeLists: Specify /volatile:iso for MSVC
2019-05-14 06:34:53 -04:00
Mat M
3e8e335a5c Merge pull request #2450 from lioncash/warn-level
CMakeLists: Explicitly specify -Wall for the non-MSVC case
2019-05-14 06:34:05 -04:00
Rodrigo Locatti
940a71089d Merge pull request #2413 from FernandoS27/opt-gpu
Rasterizer Cache: refactor flushing & optimize memory usage of surfaces
2019-05-13 23:01:59 -03:00
Lioncash
716fbaef74 video_core/memory_manager: Mark IsBlockContinuous() as a const member function
Corrects the typo in its name and marks the function as a const member
function, given it doesn't actually modify memory manager state.
2019-05-09 19:14:36 -04:00
Lioncash
d4bcd006b2 video_core/memory_manager: Mark the constructor as explicit
Prevents implicit converting constructions of the memory manager.
2019-05-09 19:10:26 -04:00
Lioncash
fd12788967 video_core/memory_manager: Default the destructor within the cpp file
Makes the class less surprising when it comes to forward declaring the
type, and also prevents inlining the destruction code of the class,
given it contains non-trivial types.
2019-05-09 19:10:13 -04:00
Lioncash
53afe47cec video_core/memory_manager: Amend doxygen comments
Corrects references to non-existent parameters and corrects typos.
2019-05-09 19:09:19 -04:00
Lioncash
5235b053b4 video_core/memory_manager: Remove superfluous const from function declarations
These are able to be omitted from the declaration of functions, since
they don't do anything at the type system level. The definitions of the
functions can retain the use of const though, since they make the
variables immutable in the implementation of the function where they're
used.
2019-05-09 18:59:49 -04:00
Lioncash
b6408e9671 video_core/renderer_opengl/gl_shader_cache: Correct member initialization order
Silences a -Wreorder warning.
2019-05-09 18:55:47 -04:00
Lioncash
e43ba3acd4 video_core/shader/decode/texture: Remove unused variable from GetTld4Code() 2019-05-09 18:49:56 -04:00
Lioncash
e3c45b4338 renderer_vulkan/vk_shader_decompiler: Remove unused variable from DeclareInternalFlags() 2019-05-09 18:47:48 -04:00
Lioncash
175fe8aaeb video_core/renderer_opengl/gl_shader_decompiler: Remove unused Composite() function
This isn't used at all, so it can be removed.
2019-05-09 18:45:26 -04:00
Lioncash
6d28d288a3 video_core/renderer_opengl/gl_rasterizer_cache: Remove unused variable in UploadGLMipmapTexture()
This variable is unused entirely, so it can be removed.
2019-05-09 18:42:48 -04:00
Lioncash
ba165b1092 video_core/gpu_thread: Remove unused local variable
Instead of retrieving the data from the std::variant instance, we can
just check if the variant contains that type of data.

This is essentially the same behavior, only it returns a bool indicating
whether or not the type in the variant is currently active, instead of
actually retrieving the data.
2019-05-09 18:39:21 -04:00
Lioncash
c56d893e77 video_core/textures/astc: Remove unused variables
Silences a few compilation warnings.
2019-05-09 18:33:36 -04:00
Lioncash
c4d03f0154 CMakeLists: Specify /volatile:iso for MSVC
By default, MSVC doesn't use standards-compliant volatile semantics.
This makes it behave in a standards-compliant manner, making
expectations more uniform across compilers.
2019-05-09 15:49:30 -04:00
bunnei
7cb17834c7 Merge pull request #2437 from lioncash/audctl
service/audctl: Update documentation comments to be relative to 8.0.0
2019-05-09 13:24:13 -04:00
bunnei
f3317cf2db Merge pull request #2454 from lioncash/cflag
src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags
2019-05-09 13:23:49 -04:00
bunnei
daca045fcd Merge pull request #2442 from FernandoS27/astc-fix
Fix Layered ASTC Textures
2019-05-09 13:23:14 -04:00
bunnei
f69d3a6351 Merge pull request #2443 from ReinUsesLisp/skip-repeated-variants
gl_shader_disk_cache: Skip stored shader variants instead of asserting
2019-05-09 13:22:42 -04:00
bunnei
5907619a04 Merge pull request #2445 from FearlessTobi/port-4749
Port citra-emu/citra#4749: "web_service: Misc fixes"
2019-05-09 13:22:00 -04:00
bunnei
9567b3a293 Merge pull request #2458 from lioncash/hotkey
yuzu/hotkeys: Remove unnecessary constructor
2019-05-09 13:21:36 -04:00
bunnei
c6f3831320 Merge pull request #2456 from lioncash/qualifier
yuzu/compatdb: Remove unnecessary qualifiers
2019-05-09 13:21:04 -04:00
bunnei
8abf0add04 Merge pull request #2459 from lioncash/what
configure_dialog: Remove the Whats This? button from the dialog
2019-05-09 13:20:12 -04:00
bunnei
5b6571c170 Merge pull request #2453 from lioncash/enum
core/memory: Remove unused FlushMode enum
2019-05-09 13:19:49 -04:00
bunnei
c27b81cb85 Merge pull request #2429 from FernandoS27/compute
Corrections and Implementation on GPU Engines
2019-05-09 13:19:22 -04:00
bunnei
0e9a17b029 Merge pull request #2440 from lioncash/dynarmic
externals: Update dynarmic to master
2019-05-09 13:18:49 -04:00
Lioncash
f3c18d622e configure_dialog: Remove the Whats This? button from the dialog 2019-05-09 03:20:13 -04:00
Lioncash
8bdef4f951 yuzu/hotkeys: Remove unnecessary constructor
The behavior of the Hotkey constructor is already accomplished via in-class member
initializers, so the constructor is superfluous here.
2019-05-09 02:17:22 -04:00
Lioncash
a97120efc1 yuzu/compatdb: Remove unnecessary qualifiers
Keeps the code consistent in regards to how the buttons are referred to.
2019-05-09 01:08:06 -04:00
Lioncash
70c6506a7e src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags
The C++ standard allows constexpr variables declared with the extern
keyword to have external linkage. Previously MSVC wasn't abiding by
this. This just makes the compiler more standards compliant during
builds.

Given we currently don't make use of anything that would break by this,
this is safe to enable.
2019-05-07 14:06:22 -04:00
Lioncash
6ca7241bd9 src/CMakeLists: Vertically order compilation flags
Makes it much nicer to visually scan the options. This also starts the
flag descriptions from the same column for the same reason.
2019-05-07 14:05:48 -04:00
Lioncash
495a8d8d95 core/memory: Remove unused FlushMode enum
Recent changes to memory-related code resulted in this being unused, so
we can remove it.
2019-05-07 13:55:17 -04:00
Lioncash
0964444529 externals: Update dynarmic to master
Better instruction support has been added since the last update.
2019-05-07 10:39:25 -04:00
Merry
c63e68c480 Merge pull request #2451 from lioncash/travis
travis: Update to using Xcode 10.2
2019-05-07 15:36:13 +01:00
Lioncash
4aefd45193 travis: Update to using Xcode 10.2
Keeps the CI toolchain updated. This is also necessary for updating
dynarmic.
2019-05-07 06:40:30 -04:00
Rodrigo Locatti
6743982d28 Merge pull request #2447 from lioncash/dtor
core/frontend/emu_window: Make GraphicsContext's destructor virtual
2019-05-07 05:54:04 -03:00
Rodrigo Locatti
57db3f6763 Merge pull request #2448 from lioncash/pragma
common/zstd_compression: Remove #pragma once directive from source file
2019-05-07 05:51:37 -03:00
Rodrigo Locatti
a206418846 Merge pull request #2449 from lioncash/unused-var
gl_rasterizer: Silence unused variable warnings
2019-05-07 05:50:59 -03:00
zhupengfei
10c4f23953 core/telemetry_session: Only create the backend when we really need it
The backend is not used until we decide to submit the testcase/telemetry, and creating it early prevents users from updating the credentials properly while the games are running.
2019-05-04 19:45:48 +02:00
Lioncash
9e15193ef8 shader/decode/texture: Remove unused variable
This isn't used anywhere, so we can get rid of it.
2019-05-04 02:10:38 -04:00
Lioncash
5d0dca73c6 CMakeLists: Explicitly specify -Wall for the non-MSVC case
Ensures that -Wall is always active as a compilation flag.
2019-05-04 02:06:56 -04:00
Lioncash
08b270676b gl_rasterizer: Silence unused variable warning
Makes use of src, so it's not considered unused.
2019-05-04 02:00:17 -04:00
Lioncash
a6f7a44aab common/zstd_compression: Remove #pragma once directive from source file
Introduced in 72477731ed. This is only
necessary within header files.
2019-05-04 01:54:29 -04:00
Lioncash
1230a0e7ce core/frontend/emu_window: Make GraphicsContext's destructor virtual
This class is used in a polymorphic context, so destruction of the
context will lead to undefined behavior if the destructor isn't virtual.
2019-05-04 01:47:38 -04:00
bunnei
1f72bb733f Merge pull request #2408 from FearlessTobi/port-4215
Port citra-emu/citra#4215: "travis: Use Ninja for Travis builds"
2019-05-02 20:59:09 -04:00
Fernando Sahmkow
e64c41efe8 Refactors and name corrections. 2019-05-01 15:31:39 -04:00
ReinUsesLisp
4aa081b4e7 gl_shader_disk_cache: Skip stored shader variants instead of asserting
Instead of asserting on already stored shader variants, silently skip them.
This shouldn't be happening but when a shader is invalidated and it is
not stored in the shader cache, this assert would hit and save that
shader anyways when the asserts are disabled.
2019-05-01 00:36:11 -03:00
Fernando Sahmkow
95261639fb Fix Layered ASTC Textures
By adding the missing layer offset in ASTC compression.
2019-04-30 23:02:31 -04:00
Lioncash
75a8b304d4 loader/nso: Remove left-in debug pragma
Unintentionally introduced in 552d5071fa
2019-04-30 22:55:53 -04:00
bunnei
fb420358a9 Merge pull request #2406 from FearlessTobi/port-3839
Port citra-emu/citra#3839: "travis: use prebuilt image"
2019-04-30 19:25:53 -04:00
bunnei
79e54abe19 Merge pull request #2100 from FreddyFunk/disk-cache-precompiled-file
gl_shader_disk_cache: Improve precompiled shader cache generation speed and size
2019-04-30 19:24:01 -04:00
bunnei
91e239d66f Merge pull request #2435 from ReinUsesLisp/misc-vc
shader_ir: Miscellaneous fixes
2019-04-28 22:29:43 -04:00
bunnei
2be32eb3d2 Merge pull request #2412 from lioncash/system
kernel/vm_manager: Remove usages of global system accessors
2019-04-28 22:27:14 -04:00
bunnei
c52233ec8b Merge pull request #2322 from ReinUsesLisp/wswitch
video_core: Silent -Wswitch warnings
2019-04-28 22:24:58 -04:00
bunnei
9a3737120d Merge pull request #2423 from FernandoS27/half-correct
Corrections on Half Float operations: HADD2 HMUL2 and HFMA2
2019-04-28 22:24:22 -04:00
Lioncash
565fce71b1 service/audctl: Update documentation comments to be relative to 8.0.0
The state of these service calls are still the same in version 8.0.0.
2019-04-27 23:17:58 -04:00
FreddyFunk
1a3ff252a4 Re added new lines at the end of files 2019-04-23 23:19:28 +02:00
unknown
3091b40691 gl_shader_disk_cache: Compress precompiled shader cache file with Zstandard 2019-04-23 22:24:31 +02:00
unknown
9db2c734c9 gl_shader_disk_cache: Use VectorVfsFile for the virtual precompiled shader cache file 2019-04-23 22:24:23 +02:00
unknown
3fe542cf60 gl_shader_disk_cache: Remove per shader compression 2019-04-23 21:40:01 +02:00
Fernando Sahmkow
b3118ee316 Fixes and Corrections to DMA Engine 2019-04-23 15:28:18 -04:00
Fernando Sahmkow
f1e5314f1a Add Swizzle Parameters to the DMA engine 2019-04-23 11:21:00 -04:00
Fernando Sahmkow
e140e2ebc6 Add Documentation Headers to all the GPU Engines 2019-04-23 08:44:52 -04:00
Fernando Sahmkow
021d28c9b8 Corrections and styling 2019-04-23 08:02:24 -04:00
Fernando Sahmkow
701ce1c9d0 Implement Maxwell3D Data Upload 2019-04-22 19:27:36 -04:00
Fernando Sahmkow
e4ff140b99 Introduce skeleton of the GPU Compute Engine. 2019-04-22 19:05:43 -04:00
Fernando Sahmkow
a91d3fc639 Revamp Kepler Memory to use a subegine to manage uploads 2019-04-22 18:50:56 -04:00
Fernando Sahmkow
4c36b78567 Rasterizer Cache: Use a temporal storage for Surfaces loading/flushing.
This PR should heavily reduce memory usage since temporal buffers are no
longer stored per Surface but instead managed by the Rasterizer Cache.
2019-04-21 11:42:07 -04:00
Fernando Sahmkow
623b2e4b8f Corrections Half Float operations on const buffers and implement saturation. 2019-04-20 21:11:33 -04:00
Fernando Sahmkow
a3eb91ed8c RasterizerCache Redesign: Flush
flushing is now responsability of children caches instead of the cache 
object. This change will allow the specific cache to pass extra 
parameters on flushing and will allow more flexibility.
2019-04-19 20:44:56 -04:00
ReinUsesLisp
fbe8d1ceaa video_core: Silent -Wswitch warnings 2019-04-18 15:54:39 -03:00
Lioncash
b6a87b422e kernel/vm_manager: Remove usages of global system accessors
Makes the dependency on the system instance explicit within VMManager's
interface.
2019-04-16 20:02:50 -04:00
Cameron Cawley
1f3cc036da travis: Use Ninja for Travis builds 2019-04-16 01:06:34 +02:00
fearlessTobi
b67be7154d GenerateSCMRev: fix Travis compilation on repo forks 2019-04-16 00:34:22 +02:00
liushuyu
a9f58593d4 travis: use prebuilt image (#3839)
* travis: use prebuilt image

* travis: use prebuilt image (MinGW)
2019-04-15 22:22:09 +02:00
70 changed files with 952 additions and 523 deletions

View File

@@ -24,7 +24,7 @@ matrix:
- os: osx
env: NAME="macos build"
sudo: false
osx_image: xcode10.1
osx_image: xcode10.2
install: "./.travis/macos/deps.sh"
script: "./.travis/macos/build.sh"
after_success: "./.travis/macos/upload.sh"

View File

@@ -1,3 +1,3 @@
#!/bin/bash -ex
mkdir "$HOME/.ccache" || true
docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh

View File

@@ -1,3 +1,3 @@
#!/bin/sh -ex
docker pull ubuntu:18.04
docker pull yuzuemu/build-environments:linux-mingw

View File

@@ -1,16 +1,6 @@
#!/bin/bash -ex
cd /yuzu
MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
apt-get update
apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
apt-get update
apt-get install -y ${MINGW_PACKAGES}
# fix a problem in current MinGW headers
wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
# override Travis CI unreasonable ccache size
echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"
@@ -23,8 +13,8 @@ echo '' >> /bin/cmd
chmod +x /bin/cmd
mkdir build && cd build
cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
make -j4
cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
ninja
# Clean up the dirty hacks
rm /bin/uname && mv /bin/uname1 /bin/uname

View File

@@ -1,4 +1,4 @@
#!/bin/bash -ex
mkdir -p "$HOME/.ccache"
docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh

View File

@@ -1,3 +1,3 @@
#!/bin/sh -ex
docker pull ubuntu:18.04
docker pull yuzuemu/build-environments:linux-fresh

View File

@@ -1,12 +1,9 @@
#!/bin/bash -ex
apt-get update
apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
cd /yuzu
mkdir build && cd build
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
ninja
ccache -s

View File

@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
export UNICORNDIR=$(pwd)/externals/unicorn
export PATH="/usr/local/opt/ccache/libexec:$PATH"
# TODO: Build using ninja instead of make
mkdir build && cd build
cmake --version
cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON

View File

@@ -19,7 +19,7 @@ set(BUILD_VERSION "0")
if (BUILD_REPOSITORY)
# regex capture the string nightly or canary into CMAKE_MATCH_1
string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
if (${CMAKE_MATCH_COUNT} GREATER 0)
if ("${CMAKE_MATCH_COUNT}" GREATER 0)
# capitalize the first letter of each word in the repo name.
string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
foreach(WORD ${REPO_NAME_LIST})

View File

@@ -7,6 +7,10 @@ include(DownloadExternals)
add_library(catch-single-include INTERFACE)
target_include_directories(catch-single-include INTERFACE catch/single_include)
# libfmt
add_subdirectory(fmt)
add_library(fmt::fmt ALIAS fmt)
# Dynarmic
if (ARCHITECTURE_x86_64)
set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
add_subdirectory(dynarmic)
endif()
# libfmt
add_subdirectory(fmt)
add_library(fmt::fmt ALIAS fmt)
# getopt
if (MSVC)
add_subdirectory(getopt)

View File

@@ -21,15 +21,29 @@ if (MSVC)
# Ensure that projects build with Unicode support.
add_definitions(-DUNICODE -D_UNICODE)
# /W3 - Level 3 warnings
# /MP - Multi-threaded compilation
# /Zi - Output debugging information
# /Zo - enhanced debug info for optimized builds
# /permissive- - enables stricter C++ standards conformance checks
# /EHsc - C++-only exception handling semantics
# /Zc:throwingNew - let codegen assume `operator new` will never return null
# /Zc:inline - let codegen omit inline functions in object files
add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
# /W3 - Level 3 warnings
# /MP - Multi-threaded compilation
# /Zi - Output debugging information
# /Zo - Enhanced debug info for optimized builds
# /permissive- - Enables stricter C++ standards conformance checks
# /EHsc - C++-only exception handling semantics
# /volatile:iso - Use strict standards-compliant volatile semantics.
# /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
# /Zc:inline - Let codegen omit inline functions in object files
# /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null
add_compile_options(
/W3
/MP
/Zi
/Zo
/permissive-
/EHsc
/std:c++latest
/volatile:iso
/Zc:externConstexpr
/Zc:inline
/Zc:throwingNew
)
# /GS- - No stack buffer overflow checks
add_compile_options("$<$<CONFIG:Release>:/GS->")
@@ -37,7 +51,10 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
else()
add_compile_options("-Wno-attributes")
add_compile_options(
-Wall
-Wno-attributes
)
if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
add_compile_options("-stdlib=libc++")

View File

@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <zstd.h>

View File

@@ -10,6 +10,8 @@
namespace Core::Frontend {
GraphicsContext::~GraphicsContext() = default;
class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
public std::enable_shared_from_this<TouchState> {
public:

View File

@@ -19,6 +19,8 @@ namespace Core::Frontend {
*/
class GraphicsContext {
public:
virtual ~GraphicsContext();
/// Makes the graphics context current for the caller thread
virtual void MakeCurrent() = 0;

View File

@@ -241,7 +241,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
}
Process::Process(Core::System& system)
: WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
: WaitObject{system.Kernel()}, vm_manager{system},
address_arbiter{system}, mutex{system}, system{system} {}
Process::~Process() = default;

View File

@@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
return true;
}
VMManager::VMManager() {
VMManager::VMManager(Core::System& system) : system{system} {
// Default to assuming a 39-bit address space. This way we have a sane
// starting point with executables that don't provide metadata.
Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
@@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
VirtualMemoryArea& final_vma = vma_handle->second;
ASSERT(final_vma.size == size);
auto& system = Core::System::GetInstance();
system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
VMAPermission::ReadWriteExecute);
system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
@@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
VirtualMemoryArea& final_vma = vma_handle->second;
ASSERT(final_vma.size == size);
auto& system = Core::System::GetInstance();
system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
@@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
ASSERT(FindVMA(target)->second.size >= size);
auto& system = Core::System::GetInstance();
system.ArmInterface(0).UnmapMemory(target, size);
system.ArmInterface(1).UnmapMemory(target, size);
system.ArmInterface(2).UnmapMemory(target, size);
@@ -376,7 +373,7 @@ ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64
Reprotect(src_vma_iter, VMAPermission::ReadWrite);
if (dst_memory_state == MemoryState::ModuleCode) {
Core::System::GetInstance().InvalidateCpuInstructionCaches();
system.InvalidateCpuInstructionCaches();
}
return unmap_result;

View File

@@ -14,6 +14,10 @@
#include "core/hle/result.h"
#include "core/memory.h"
namespace Core {
class System;
}
namespace FileSys {
enum class ProgramAddressSpaceType : u8;
}
@@ -321,7 +325,7 @@ class VMManager final {
public:
using VMAHandle = VMAMap::const_iterator;
VMManager();
explicit VMManager(Core::System& system);
~VMManager();
/// Clears the address space map, re-initializing with a single free area.
@@ -712,5 +716,7 @@ private:
// The end of the currently allocated heap. This is not an inclusive
// end of the range. This is essentially 'base_address + current_size'.
VAddr heap_end = 0;
Core::System& system;
};
} // namespace Kernel

View File

@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called.");
// This service function is currently hardcoded on the
// actual console to this value (as of 6.0.0).
// actual console to this value (as of 8.0.0).
constexpr s32 target_min_volume = 0;
IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called.");
// This service function is currently hardcoded on the
// actual console to this value (as of 6.0.0).
// actual console to this value (as of 8.0.0).
constexpr s32 target_max_volume = 15;
IPC::ResponseBuilder rb{ctx, 3};

View File

@@ -21,8 +21,6 @@
#include "core/memory.h"
#include "core/settings.h"
#pragma optimize("", off)
namespace Loader {
namespace {
struct MODHeader {

View File

@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);
std::string ReadCString(VAddr vaddr, std::size_t max_length);
enum class FlushMode {
/// Write back modified surfaces to RAM
Flush,
/// Remove region from the cache
Invalidate,
/// Write back modified surfaces to RAM, and also remove them from the cache
FlushAndInvalidate,
};
/**
* Mark each page touching the region as cached.
*/

View File

@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
}
TelemetrySession::TelemetrySession() {
#ifdef ENABLE_WEB_SERVICE
backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
#else
backend = std::make_unique<Telemetry::NullVisitor>();
#endif
// Log one-time top-level information
AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());
@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
.count()};
AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
#else
auto backend = std::make_unique<Telemetry::NullVisitor>();
#endif
// Complete the session, submitting to web service if necessary
// This is just a placeholder to wrap up the session once the core completes and this is
// destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
field_collection.Accept(*backend);
if (Settings::values.enable_telemetry)
backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {
bool TelemetrySession::SubmitTestcase() {
#ifdef ENABLE_WEB_SERVICE
auto backend = std::make_unique<WebService::TelemetryJson>(
Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
field_collection.Accept(*backend);
return backend->SubmitTestcase();
#else

View File

@@ -39,7 +39,6 @@ public:
private:
Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
};
/**

View File

@@ -3,6 +3,8 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
engines/engine_upload.cpp
engines/engine_upload.h
engines/fermi_2d.cpp
engines/fermi_2d.h
engines/kepler_compute.cpp

View File

@@ -105,6 +105,8 @@ bool DmaPusher::Step() {
dma_state.non_incrementing = false;
dma_increment_once = true;
break;
default:
break;
}
}
}

View File

@@ -0,0 +1,48 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines::Upload {
State::State(MemoryManager& memory_manager, Registers& regs)
: memory_manager(memory_manager), regs(regs) {}
void State::ProcessExec(const bool is_linear) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
this->is_linear = is_linear;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
write_offset += sub_copy_size;
if (!is_last_call) {
return;
}
const GPUVAddr address{regs.dest.Address()};
if (is_linear) {
memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
tmp_buffer.resize(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
}
} // namespace Tegra::Engines::Upload

View File

@@ -0,0 +1,75 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines::Upload {
struct Registers {
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
};
class State {
public:
State(MemoryManager& memory_manager, Registers& regs);
~State() = default;
void ProcessExec(const bool is_linear);
void ProcessData(const u32 data, const bool is_last_call);
private:
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
std::vector<u8> tmp_buffer;
bool is_linear = false;
Registers& regs;
MemoryManager& memory_manager;
};
} // namespace Tegra::Engines::Upload

View File

@@ -21,6 +21,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as G80_2D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
*/
#define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))

View File

@@ -4,12 +4,21 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
memory_manager,
regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) {
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break;
}
case KEPLER_COMPUTE_REG_INDEX(launch):
// Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
// kernels)
UNREACHABLE_MSG("Compute shaders are not implemented");
ProcessLaunch();
break;
default:
break;
}
}
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
}
} // namespace Tegra::Engines

View File

@@ -6,22 +6,40 @@
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
/**
* This Engine is known as GK104_Compute. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
*/
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final {
public:
explicit KeplerCompute(MemoryManager& memory_manager);
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~KeplerCompute();
static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
union {
struct {
INSERT_PADDING_WORDS(0xAF);
INSERT_PADDING_WORDS(0x60);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x3F);
struct {
u32 address;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
}
} launch_desc_loc;
INSERT_PADDING_WORDS(0x1);
u32 launch;
INSERT_PADDING_WORDS(0xC48);
INSERT_PADDING_WORDS(0x4A7);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tsc;
INSERT_PADDING_WORDS(0x3);
struct {
u32 address_high;
u32 address_low;
u32 limit;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} tic;
INSERT_PADDING_WORDS(0x22);
struct {
u32 address_high;
u32 address_low;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
} code_loc;
INSERT_PADDING_WORDS(0x3FE);
u32 texture_const_buffer_index;
INSERT_PADDING_WORDS(0x374);
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
struct LaunchParams {
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
INSERT_PADDING_WORDS(0x8);
u32 program_start;
INSERT_PADDING_WORDS(0x2);
BitField<30, 1, u32> linked_tsc;
BitField<0, 31, u32> grid_dim_x;
union {
BitField<0, 16, u32> grid_dim_y;
BitField<16, 16, u32> grid_dim_z;
};
INSERT_PADDING_WORDS(0x3);
BitField<0, 16, u32> shared_alloc;
BitField<0, 31, u32> block_dim_x;
union {
BitField<0, 16, u32> block_dim_y;
BitField<16, 16, u32> block_dim_z;
};
union {
BitField<0, 8, u32> const_buffer_enable_mask;
BitField<29, 2, u32> cache_layout;
} memory_config;
INSERT_PADDING_WORDS(0x8);
struct {
u32 address_low;
union {
BitField<0, 8, u32> address_high;
BitField<15, 17, u32> size;
};
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
address_low);
}
} const_buffer_config[8];
union {
BitField<0, 20, u32> local_pos_alloc;
BitField<27, 5, u32> barrier_alloc;
};
union {
BitField<0, 20, u32> local_neg_alloc;
BitField<24, 5, u32> gpr_alloc;
};
INSERT_PADDING_WORDS(0x11);
} launch_description;
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
"KeplerCompute LaunchParams has wrong size");
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
Upload::State upload_state;
void ProcessLaunch();
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(launch, 0xAF);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_loc, 0x582);
ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION

View File

@@ -14,9 +14,8 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
: system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
switch (method_call.method) {
case KEPLERMEMORY_REG_INDEX(exec): {
ProcessExec();
upload_state.ProcessExec(regs.exec.linear != 0);
break;
}
case KEPLERMEMORY_REG_INDEX(data): {
ProcessData(method_call.argument, method_call.IsLastCall());
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
break;
}
}
}
void KeplerMemory::ProcessExec() {
state.write_offset = 0;
state.copy_size = regs.line_length_in * regs.line_count;
state.inner_buffer.resize(state.copy_size);
}
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
state.write_offset += sub_copy_size;
if (is_last_call) {
const GPUVAddr address{regs.dest.Address()};
if (regs.exec.linear != 0) {
memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
} else {
UNIMPLEMENTED_IF(regs.dest.z != 0);
UNIMPLEMENTED_IF(regs.dest.depth != 1);
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
std::vector<u8> tmp_buffer(dst_size);
memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
state.inner_buffer.data(), tmp_buffer.data());
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
}
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
}
}
} // namespace Tegra::Engines

View File

@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
/**
* This Engine is known as P2MF. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
*/
#define KEPLERMEMORY_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
class KeplerMemory final {
public:
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
KeplerMemory(Core::System& system, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
struct {
INSERT_PADDING_WORDS(0x60);
u32 line_length_in;
u32 line_count;
struct {
u32 address_high;
u32 address_low;
u32 pitch;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
};
u32 width;
u32 height;
u32 depth;
u32 z;
u32 x;
u32 y;
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
u32 BlockWidth() const {
return 1U << block_width.Value();
}
u32 BlockHeight() const {
return 1U << block_height.Value();
}
u32 BlockDepth() const {
return 1U << block_depth.Value();
}
} dest;
Upload::Registers upload;
struct {
union {
@@ -96,28 +63,17 @@ public:
};
} regs{};
struct {
u32 write_offset = 0;
u32 copy_size = 0;
std::vector<u8> inner_buffer;
} state{};
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
void ProcessExec();
void ProcessData(u32 data, bool is_last_call);
Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(line_length_in, 0x60);
ASSERT_REG_POSITION(line_count, 0x61);
ASSERT_REG_POSITION(dest, 0x62);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec, 0x6C);
ASSERT_REG_POSITION(data, 0x6D);
#undef ASSERT_REG_POSITION

View File

@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
*this} {
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
InitializeRegisterDefaults();
}
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessSyncPoint();
break;
}
case MAXWELL3D_REG_INDEX(exec_upload): {
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case MAXWELL3D_REG_INDEX(data_upload): {
const bool is_last_call = method_call.IsLastCall();
upload_state.ProcessData(method_call.argument, is_last_call);
if (is_last_call) {
dirty_flags.OnMemoryWrite();
}
break;
}
default:
break;
}

View File

@@ -14,6 +14,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
#include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as GF100_3D. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
* https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
*/
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@@ -243,9 +250,10 @@ public:
return "10_10_10_2";
case Size::Size_11_11_10:
return "11_11_10";
default:
UNREACHABLE();
return {};
}
UNREACHABLE();
return {};
}
std::string TypeString() const {
@@ -579,7 +587,18 @@ public:
u32 bind;
} macros;
INSERT_PADDING_WORDS(0x69);
INSERT_PADDING_WORDS(0x17);
Upload::Registers upload;
struct {
union {
BitField<0, 1, u32> linear;
};
} exec_upload;
u32 data_upload;
INSERT_PADDING_WORDS(0x44);
struct {
union {
@@ -1175,6 +1194,8 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
MacroInterpreter macro_interpreter;
Upload::State upload_state;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1218,6 +1239,9 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);

View File

@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
const std::size_t copy_size = regs.x_count * regs.y_count;
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
if (!source_ptr) {
LOG_ERROR(HW_GPU, "source_ptr is invalid");
return;
}
if (!dst_ptr) {
LOG_ERROR(HW_GPU, "dst_ptr is invalid");
return;
}
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
const std::size_t src_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
copy_size * src_bytes_per_pixel);
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
write_buffer.data(), regs.src_params.BlockHeight(),
regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.size_z == 1);
ASSERT(regs.src_pitch == regs.x_count);
ASSERT(regs.dst_params.BlockDepth() == 1);
const u32 src_bpp = regs.src_pitch / regs.x_count;
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
FlushAndInvalidate(regs.src_pitch * regs.y_count,
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
const std::size_t dst_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
src_bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
read_buffer.data(), regs.dst_params.BlockHeight());
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}

View File

@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
namespace Tegra::Engines {
/**
* This Engine is known as GK104_Copy. Documentation can be found in:
* https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
*/
class MaxwellDMA final {
public:
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
enum class ComponentMode : u32 {
Src0 = 0,
Src1 = 1,
Src2 = 2,
Src3 = 3,
Const0 = 4,
Const1 = 5,
Zero = 6,
};
enum class CopyMode : u32 {
None = 0,
Unk1 = 1,
@@ -128,7 +144,26 @@ public:
u32 x_count;
u32 y_count;
INSERT_PADDING_WORDS(0xBB);
INSERT_PADDING_WORDS(0xB8);
u32 const0;
u32 const1;
union {
BitField<0, 4, ComponentMode> component0;
BitField<4, 4, ComponentMode> component1;
BitField<8, 4, ComponentMode> component2;
BitField<12, 4, ComponentMode> component3;
BitField<16, 2, u32> component_size;
BitField<20, 3, u32> src_num_components;
BitField<24, 3, u32> dst_num_components;
u32 SrcBytePerPixel() const {
return src_num_components.Value() * component_size.Value();
}
u32 DstBytePerPixel() const {
return dst_num_components.Value() * component_size.Value();
}
} swizzle_config;
Parameters dst_params;
@@ -149,6 +184,9 @@ private:
MemoryManager& memory_manager;
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
ASSERT_REG_POSITION(dst_pitch, 0x105);
ASSERT_REG_POSITION(x_count, 0x106);
ASSERT_REG_POSITION(y_count, 0x107);
ASSERT_REG_POSITION(const0, 0x1C0);
ASSERT_REG_POSITION(const1, 0x1C1);
ASSERT_REG_POSITION(swizzle_config, 0x1C2);
ASSERT_REG_POSITION(dst_params, 0x1C3);
ASSERT_REG_POSITION(src_params, 0x1CA);

View File

@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
GPU::~GPU() = default;

View File

@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
return;
} else {
UNREACHABLE();

View File

@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
UpdatePageTableForVMA(initial_vma);
}
MemoryManager::~MemoryManager() = default;
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
const u64 aligned_size{Common::AlignUp(size, page_size)};
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
return {};
}
bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
const GPUVAddr end = start + size;
const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
return range == size;
}

View File

@@ -47,7 +47,8 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
MemoryManager(VideoCore::RasterizerInterface& rasterizer);
explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
~MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -65,18 +66,18 @@ public:
u8* GetPointer(GPUVAddr addr);
const u8* GetPointer(GPUVAddr addr) const;
// Returns true if the block is continous in host memory, false otherwise
bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
/// Returns true if the block is continuous in host memory, false otherwise
bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continous
* GPU Memory. It's important to use these when GPU memory may not be continuous
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -88,9 +89,9 @@ public:
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
@@ -111,10 +112,10 @@ private:
/**
* Maps an unmanaged host memory pointer at a given address.
*
* @param target The guest address to start the mapping at.
* @param memory The memory to be mapped.
* @param size Size of the mapping.
* @param state MemoryState tag to attach to the VMA.
* @param target The guest address to start the mapping at.
* @param memory The memory to be mapped.
* @param size Size of the mapping in bytes.
* @param backing_addr The base address of the range to back this mapping.
*/
VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
@@ -124,7 +125,7 @@ private:
/// Converts a VMAHandle to a mutable VMAIter.
VMAIter StripIterConstness(const VMAHandle& iter);
/// Marks as the specfied VMA as allocated.
/// Marks as the specified VMA as allocated.
VMAIter Allocate(VMAIter vma);
/**

View File

@@ -37,9 +37,6 @@ public:
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
/// Wriets any cached resources back to memory
virtual void Flush() = 0;
/// Sets whether the cached object should be considered registered
void SetIsRegistered(bool registered) {
is_registered = registered;
@@ -158,6 +155,8 @@ protected:
return ++modified_ticks;
}
virtual void FlushObjectInner(const T& object) = 0;
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
std::lock_guard lock{mutex};
@@ -165,7 +164,7 @@ protected:
if (!object->IsDirty()) {
return;
}
object->Flush();
FlushObjectInner(object);
object->MarkAsModified(false, *this);
}

View File

@@ -42,9 +42,6 @@ public:
return alignment;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
private:
VAddr cpu_addr{};
std::size_t size{};
@@ -75,6 +72,9 @@ public:
protected:
void AlignBuffer(std::size_t alignment);
// We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
private:
OGLStreamBuffer stream_buffer;

View File

@@ -46,7 +46,7 @@ public:
/// Reloads the global region from guest memory
void Reload(u32 size_);
void Flush() override;
void Flush();
private:
VAddr cpu_addr{};
@@ -65,6 +65,11 @@ public:
GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
protected:
void FlushObjectInner(const GlobalRegion& object) override {
object->Flush();
}
private:
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);

View File

@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
params.index_buffer_offset =
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
params.index_buffer_offset = primitive_assembler.MakeQuadArray(
regs.vertex_buffer.first, regs.vertex_buffer.count);
}
return params;
}
@@ -305,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
default:
break;
}
continue;
}
@@ -920,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();
viewport.height = viewport_rect.GetHeight();
viewport.depth_range_far = regs.viewports[i].depth_range_far;
viewport.depth_range_near = regs.viewports[i].depth_range_near;
viewport.depth_range_far = src.depth_range_far;
viewport.depth_range_near = src.depth_range_near;
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;

View File

@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer() {
void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
gl_buffer.resize(params.max_mip_level);
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
if (gl_buffer.size() < params.max_mip_level)
gl_buffer.resize(params.max_mip_level);
for (u32 i = 0; i < params.max_mip_level; i++)
gl_buffer[i].resize(params.GetMipmapSizeGL(i));
if (params.is_tiled) {
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
// OpenGL temporary buffer needs to be big enough to store raw texture size
gl_buffer.resize(1);
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
}
}
void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
GLuint draw_fb_handle) {
void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
GLuint read_fb_handle, GLuint draw_fb_handle) {
const auto& rect{params.GetRect(mip_map)};
auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
// Load data from memory to the surface
const auto x0 = static_cast<GLint>(rect.left);
const auto y0 = static_cast<GLint>(rect.bottom);
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
std::size_t start = buffer_offset;
for (std::size_t face = 0; face < params.depth; ++face) {
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
static_cast<GLsizei>(rect.GetWidth()),
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
GLuint read_fb_handle, GLuint draw_fb_handle) {
MICROPROFILE_SCOPE(OpenGL_TextureUL);
for (u32 i = 0; i < params.max_mip_level; i++)
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
}
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->LoadGLBuffer(temporal_memory);
surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
surface->MarkForReload(false);
}

View File

@@ -355,6 +355,12 @@ namespace OpenGL {
class RasterizerOpenGL;
// This is used to store temporary big buffers,
// instead of creating/destroying all the time
struct RasterizerTemporaryMemory {
std::vector<std::vector<u8>> gl_buffer;
};
class CachedSurface final : public RasterizerCacheObject {
public:
explicit CachedSurface(const SurfaceParams& params);
@@ -371,10 +377,6 @@ public:
return memory_size;
}
void Flush() override {
FlushGLBuffer();
}
const OGLTexture& Texture() const {
return texture;
}
@@ -397,11 +399,12 @@ public:
}
// Read/Write data in Switch memory to/from gl_buffer
void LoadGLBuffer();
void FlushGLBuffer();
void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
// Upload data in gl_buffer to this surface's texture
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
GLuint draw_fb_handle);
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
@@ -429,13 +432,13 @@ public:
}
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
GLuint read_fb_handle, GLuint draw_fb_handle);
void EnsureTextureDiscrepantView();
OGLTexture texture;
OGLTexture discrepant_view;
std::vector<std::vector<u8>> gl_buffer;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};
@@ -473,6 +476,11 @@ public:
void SignalPreDrawCall();
void SignalPostDrawCall();
protected:
void FlushObjectInner(const Surface& object) override {
object->FlushGLBuffer(temporal_memory);
}
private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -519,6 +527,8 @@ private:
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
RasterizerTemporaryMemory temporal_memory;
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;

View File

@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device)
: RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
: RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (stop_loading)
return;
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
// Build shaders
if (callback)
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
@@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (!shader) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
dumps.clear();
}
}
@@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (dumps.find(usage) == dumps.end()) {
const auto& program = precompiled_programs.at(usage);
disk_cache.SaveDump(usage, program->handle);
precompiled_cache_altered = true;
}
}
if (precompiled_cache_altered) {
disk_cache.SaveVirtualPrecompiledFile();
}
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(

View File

@@ -57,9 +57,6 @@ public:
return shader_length;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;
@@ -123,6 +120,10 @@ public:
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
protected:
// We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const Shader& object) override {}
private:
std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,

View File

@@ -871,17 +871,6 @@ private:
return {};
}
std::string Composite(Operation operation) {
std::string value = "vec4(";
for (std::size_t i = 0; i < 4; ++i) {
value += Visit(operation[i]);
if (i < 3)
value += ", ";
}
value += ')';
return value;
}
template <Type type>
std::string Add(Operation operation) {
return GenerateBinaryInfix(operation, "+", type, type, type);

View File

@@ -104,7 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
: system{system}, precompiled_cache_virtual_file_offset{0} {}
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -177,6 +178,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {};
}
}
return {{raws, usages}};
}
@@ -208,59 +210,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
precompiled_cache_virtual_file_offset = 0;
ShaderCacheVersionHash file_hash{};
if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
return {};
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
return {};
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
while (file.Tell() < file.GetSize()) {
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
if (!LoadObjectFromPrecompiled(kind)) {
return {};
}
switch (kind) {
case PrecompiledEntryKind::Decompiled: {
u64 unique_identifier{};
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
if (!LoadObjectFromPrecompiled(unique_identifier)) {
return {};
}
const auto entry = LoadDecompiledEntry(file);
if (!entry)
const auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
decompiled.insert({unique_identifier, std::move(*entry)});
break;
}
case PrecompiledEntryKind::Dump: {
ShaderDiskCacheUsage usage;
if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
if (!LoadObjectFromPrecompiled(usage)) {
return {};
}
ShaderDiskCacheDump dump;
if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return {};
}
u32 binary_length{};
u32 compressed_size{};
if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
if (!LoadObjectFromPrecompiled(binary_length)) {
return {};
}
std::vector<u8> compressed_binary(compressed_size);
if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
compressed_binary.size()) {
return {};
}
dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
if (dump.binary.empty()) {
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
@@ -274,45 +281,41 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {{decompiled, dumps}};
}
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
FileUtil::IOFile& file) {
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
u32 code_size{};
u32 compressed_code_size{};
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
std::vector<u8> compressed_code(compressed_code_size);
if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
std::vector<u8> code(code_size);
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
if (code.empty()) {
return {};
}
ShaderDiskCacheDecompiled entry;
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
u32 const_buffers_count{};
if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
}
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
u8 is_indirect{};
if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
}
u32 samplers_count{};
if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
}
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
@@ -320,12 +323,9 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u8 is_array{};
u8 is_shadow{};
u8 is_bindless{};
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
@@ -335,17 +335,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
}
u32 global_memory_count{};
if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
u8 is_read{};
u8 is_written{};
if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
@@ -354,74 +354,81 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (auto& clip_distance : entry.entries.clip_distances) {
u8 clip_distance_raw{};
if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
if (!LoadObjectFromPrecompiled(clip_distance_raw))
return {};
clip_distance = clip_distance_raw != 0;
}
u64 shader_length{};
if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
}
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
const std::string& code,
const std::vector<u8>& compressed_code,
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries) {
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
!SaveObjectToPrecompiled(unique_identifier) ||
!SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
!SaveArrayToPrecompiled(code.data(), code.size())) {
return false;
}
if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
return false;
}
for (const auto& cbuf : entries.const_buffers) {
if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
return false;
}
}
if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
return false;
}
for (const auto& sampler : entries.samplers) {
if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
return false;
}
}
if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
return false;
}
for (const auto& gmem : entries.global_memory_entries) {
if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
return false;
}
}
return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
return false;
}
return true;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
if (!FileUtil::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
@@ -429,7 +436,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
InvalidatePrecompiled();
}
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
// Clear virtaul precompiled cache file
precompiled_cache_virtual_file.Resize(0);
if (!FileUtil::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
@@ -465,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
ASSERT(usages.find(usage) == usages.end());
if (usages.find(usage) != usages.end()) {
// Skip this variant since the shader is already stored.
return;
}
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
@@ -485,22 +498,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
reinterpret_cast<const u8*>(code.data()), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
return;
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
if (!SaveDecompiledFile(unique_identifier, code, entries)) {
LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing");
file.Close();
InvalidatePrecompiled();
}
}
@@ -516,28 +520,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const std::vector<u8> compressed_binary =
Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
usage.unique_identifier);
return;
}
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
compressed_binary.size()) {
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
!SaveObjectToPrecompiled(usage) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
usage.unique_identifier);
file.Close();
InvalidatePrecompiled();
return;
}
@@ -570,28 +559,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
return file;
}
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
if (!EnsureDirectories())
return {};
void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
const auto hash{GetShaderCacheVersionHash()};
if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
LOG_ERROR(
Render_OpenGL,
"Failed to write precompiled cache version hash to virtual precompiled cache file");
}
}
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
precompiled_cache_virtual_file_offset = 0;
const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
const std::vector<u8>& compressed =
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};
const bool existed = FileUtil::Exists(precompiled_path);
FileUtil::IOFile file(precompiled_path, "wb");
FileUtil::IOFile file(precompiled_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return {};
return;
}
if (!existed || file.GetSize() == 0) {
const auto hash{GetShaderCacheVersionHash()};
if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
precompiled_path);
return {};
}
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return;
}
return file;
}
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {

View File

@@ -16,6 +16,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -172,10 +173,10 @@ public:
LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable() const;
void InvalidateTransferable();
/// Removes the precompiled cache file.
void InvalidatePrecompiled() const;
/// Removes the precompiled cache file and clears virtual precompiled cache file.
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
@@ -190,18 +191,21 @@ public:
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
LoadPrecompiledFile(FileUtil::IOFile& file);
/// Loads a decompiled cache entry from the passed file. Returns empty on failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
/// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
/// failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
/// Saves a decompiled entry to the passed file. Returns true on success.
bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
const std::vector<u8>& compressed_code,
bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries);
/// Returns if the cache can be used
@@ -210,8 +214,8 @@ private:
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
/// Opens current game's precompiled file and write it's header if it doesn't exist
FileUtil::IOFile AppendPrecompiledFile() const;
/// Save precompiled header to precompiled_cache_in_memory
void SavePrecompiledHeaderToVirtualPrecompiledCache();
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
@@ -234,10 +238,42 @@ private:
/// Get current game's title id
std::string GetTitleID() const;
template <typename T>
bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += write_length;
return write_length == sizeof(T) * length;
}
template <typename T>
bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
data, length, precompiled_cache_virtual_file_offset);
precompiled_cache_virtual_file_offset += read_length;
return read_length == sizeof(T) * length;
}
template <typename T>
bool SaveObjectToPrecompiled(const T& object) {
return SaveArrayToPrecompiled(&object, 1);
}
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
// Copre system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset;
// The cache has been loaded at boot
bool tried_to_load{};
};

View File

@@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm: {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
case Maxwell::VertexAttribute::Type::SignedInt:
case Maxwell::VertexAttribute::Type::SignedNorm: {
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
case Maxwell::VertexAttribute::Type::Float: {
case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
UNREACHABLE();
return {};
}
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
UNREACHABLE();
return {};
}
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
UNREACHABLE();
return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -129,10 +126,11 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
return {};
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
@@ -186,9 +184,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
default:
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
return GL_REPEAT;
}
LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {

View File

@@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return vk::SamplerAddressMode::eMirrorClampToEdge;
default:
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
return {};
}
UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
return {};
}
vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return vk::PrimitiveTopology::eTriangleList;
case Maxwell::PrimitiveTopology::TriangleStrip:
return vk::PrimitiveTopology::eTriangleStrip;
default:
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
return {};
}
UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
return {};
}
vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {

View File

@@ -49,9 +49,6 @@ public:
return alignment;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
private:
VAddr cpu_addr{};
std::size_t size{};
@@ -87,6 +84,10 @@ public:
return buffer_handle;
}
protected:
// We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
private:
void AlignBuffer(std::size_t alignment);

View File

@@ -315,7 +315,6 @@ private:
constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
"overflow"};
for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
const auto flag_code = static_cast<InternalFlag>(flag);
const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
}

View File

@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
// Continue scanning for an exit method.
break;
}
default:
break;
}
}
return exit_method = ExitMethod::AlwaysReturn;
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
return pc + 1;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -9,6 +9,7 @@
namespace VideoCommon::Shader {
using Tegra::Shader::HalfType;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
}
}
UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
const bool negate_a =
opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);
Node op_b = [&]() {
auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20);
return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
default:
UNREACHABLE();
return Immediate(0);
return {HalfType::F32, Immediate(0)};
}
}();
op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
op_b = UnpackHalfFloat(op_b, type_b);
// redeclaration to avoid a bug in clang with reusing local bindings in lambdas
Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
Node value = [&]() {
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
case OpCode::Id::HMUL2_C:
case OpCode::Id::HMUL2_R:
return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
default:
UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
return Immediate(0);
}
}();
value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
SetRegister(bb, instr.gpr0, value);
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -120,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::FCeil, PRECISE, value);
case Tegra::Shader::F2fRoundingOp::Trunc:
return Operation(OperationCode::FTrunc, PRECISE, value);
default:
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
return Immediate(0);
}
UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
static_cast<u32>(instr.conversion.f2f.rounding.Value()));
return Immediate(0);
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);

View File

@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
case OpCode::Id::HFMA2_CR:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b,
return {instr.hfma2.saturate, HalfType::F32,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return {false, identity, Immediate(0), identity, Immediate(0)};
}
}();
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedHalfFloat(value, saturate);
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
SetRegister(bb, instr.gpr0, value);
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
return pc;
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();

View File

@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
instr.xmad.mode,
Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
default:
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);

View File

@@ -439,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
return OperationCode::LogicalUGreaterEqual;
case OperationCode::INegate:
UNREACHABLE_MSG("Can't negate an unsigned integer");
return {};
case OperationCode::IAbsolute:
UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
return {};
default:
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
return {};
}
UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
return {};
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::ABGR8S;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::ABGR8UI;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::B5G6R5:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::B5G6R5U;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::A2B10G10R10:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::A2B10G10R10U;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::A1B5G5R5:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::A1B5G5R5U;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R8:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::R8U;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::R8UI;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::G8R8:
// TextureFormat::G8R8 is actually ordered red then green, as such we can use
// PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
@@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::RG8U;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::RG8S;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
switch (component_type) {
case Tegra::Texture::ComponentType::UNORM:
return PixelFormat::RGBA16U;
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGBA16F;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::BF10GF11RF11:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::R11FG11FB10F;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGBA32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::RGBA32UI;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R32_G32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RG32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::RG32UI;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R32_G32_B32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::RGB32F;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::R16UI;
case Tegra::Texture::ComponentType::SINT:
return PixelFormat::R16I;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::R32:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
return PixelFormat::R32F;
case Tegra::Texture::ComponentType::UINT:
return PixelFormat::R32UI;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::ZF32:
return PixelFormat::Z32F;
case Tegra::Texture::TextureFormat::Z16:
@@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::DXN2UNORM;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::DXN2SNORM;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
case Tegra::Texture::TextureFormat::BC7U:
return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
case Tegra::Texture::TextureFormat::BC6H_UF16:
@@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
return PixelFormat::RG16UI;
case Tegra::Texture::ComponentType::SINT:
return PixelFormat::RG16I;
default:
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
UNREACHABLE();
break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
static_cast<u32>(component_type));
UNREACHABLE();
return PixelFormat::ABGR8U;
break;
}
LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
static_cast<u32>(component_type));
UNREACHABLE();
return PixelFormat::ABGR8U;
}
ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
@@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) {
case PixelFormat::DXT45_SRGB:
case PixelFormat::BC7U_SRGB:
return true;
default:
return false;
}
return false;
}
} // namespace VideoCore::Surface

View File

@@ -25,8 +25,8 @@
class InputBitStream {
public:
explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
: m_CurByte(ptr), m_NextBit(start_offset % 8) {}
~InputBitStream() = default;
@@ -55,12 +55,9 @@ public:
}
private:
const int m_NumBits;
const unsigned char* m_CurByte;
int m_NextBit = 0;
int m_BitsRead = 0;
bool done = false;
};
class OutputBitStream {
@@ -114,7 +111,6 @@ private:
const int m_NumBits;
unsigned char* m_CurByte;
int m_NextBit = 0;
int m_BitsRead = 0;
bool done = false;
};
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
std::size_t depth_offset = 0;
std::vector<uint8_t> outData(height * width * depth * 4);
for (uint32_t k = 0; k < depth; k++) {
for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
uint32_t decompWidth = std::min(block_width, width - i);
uint32_t decompHeight = std::min(block_height, height - j);
uint8_t* outRow = outData.data() + (j * width + i) * 4;
uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
for (uint32_t jj = 0; jj < decompHeight; jj++) {
memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
}
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
blockIdx++;
}
}
depth_offset += height * width * 4;
}
return outData;

View File

@@ -58,7 +58,7 @@ void CompatDB::Submit() {
button(NextButton)->setEnabled(false);
button(NextButton)->setText(tr("Submitting"));
button(QWizard::CancelButton)->setVisible(false);
button(CancelButton)->setVisible(false);
testcase_watcher.setFuture(QtConcurrent::run(
[] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
tr("An error occured while sending the Testcase"));
button(NextButton)->setEnabled(true);
button(NextButton)->setText(tr("Next"));
button(QWizard::CancelButton)->setVisible(true);
button(CancelButton)->setVisible(true);
} else {
next();
// older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
// workaround
button(QWizard::CancelButton)->setVisible(false);
button(CancelButton)->setVisible(false);
}
}

View File

@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
ui->hotkeysTab->Populate(registry);
this->setConfiguration();
this->PopulateSelectionList();
setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
&ConfigureDialog::UpdateVisibleTabs);
adjustSize();
ui->selectorList->setCurrentRow(0);

View File

@@ -69,16 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
ConfigureGraphics::~ConfigureGraphics() = default;
void ConfigureGraphics::setConfiguration() {
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
ui->resolution_factor_combobox->setCurrentIndex(
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
ui->frame_limit->setValue(Settings::values.frame_limit);
ui->use_compatibility_profile->setEnabled(runtime_lock);
ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->force_30fps_mode->setEnabled(runtime_lock);
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue));

View File

@@ -67,8 +67,6 @@ public:
private:
struct Hotkey {
Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
QKeySequence keyseq;
QShortcut* shortcut = nullptr;
Qt::ShortcutContext context = Qt::WindowShortcut;

View File

@@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {
SDL_SetMainReady();
const SDL_GLprofile profile = Settings::values.use_compatibility_profile
? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY
: SDL_GL_CONTEXT_PROFILE_CORE;
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);

View File

@@ -222,6 +222,7 @@ int main(int argc, char** argv) {
system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
emu_window->MakeCurrent();
system.Renderer().Rasterizer().LoadDiskResources();
while (emu_window->IsOpen()) {