Compare commits

...

120 Commits

Author SHA1 Message Date
yuzubot
e7d6207a61 "Merge Tagged PR 1012" 2019-11-03 10:19:13 +00:00
yuzubot
2b0d48d67c "Merge Tagged PR 1340" 2019-11-03 10:19:11 +00:00
yuzubot
7d2fa09e77 "Merge Tagged PR 1703" 2019-11-03 10:19:10 +00:00
yuzubot
f1b371016d "Merge Tagged PR 2365" 2019-11-03 10:19:09 +00:00
yuzubot
ce49a47e88 "Merge Tagged PR 2542" 2019-11-03 10:19:09 +00:00
yuzubot
cf63652688 "Merge Tagged PR 2859" 2019-11-03 10:19:08 +00:00
yuzubot
2c67c9ac82 "Merge Tagged PR 2914" 2019-11-03 10:19:07 +00:00
yuzubot
026f0a0e4d "Merge Tagged PR 2945" 2019-11-03 10:19:06 +00:00
yuzubot
269e21aa3e "Merge Tagged PR 2987" 2019-11-03 10:19:05 +00:00
yuzubot
e233b2d6a8 "Merge Tagged PR 3047" 2019-11-03 10:19:04 +00:00
yuzubot
12ed1a05ab "Merge Tagged PR 3057" 2019-11-03 10:19:04 +00:00
yuzubot
72e0907574 "Merge Tagged PR 3062" 2019-11-03 10:19:03 +00:00
Mat M
2b4208254e Merge pull request #3060 from FearlessTobi/patch-1
common/bit_field: Remove FORCE_INLINE calls
2019-11-03 04:59:06 -05:00
bunnei
84887b0088 Merge pull request #3064 from yuzu-emu/revert-3063-zero-init-padding
Revert "common_func: Use std::array for INSERT_PADDING_* macros."
2019-11-03 04:40:04 -05:00
bunnei
4edf73344f Revert "common_func: Use std::array for INSERT_PADDING_* macros." 2019-11-03 04:39:51 -05:00
bunnei
8c1e38f744 Merge pull request #3063 from bunnei/zero-init-padding
common_func: Use std::array for INSERT_PADDING_* macros.
2019-11-03 04:35:12 -05:00
bunnei
fdc5791b08 common_func: Use std::array for INSERT_PADDING_* macros.
- Zero initialization here is useful for determinism.
2019-11-03 04:30:16 -05:00
Tobias
a81987a7cb common/bit_field: Remove FORCE_INLINE calls
See bunneis comment here https://github.com/citra-emu/citra/pull/4629#discussion_r258533167.
They were supposed to be removed by him, but he missed them.
2019-11-03 08:25:37 +01:00
bunnei
bec7e3b7d9 Merge pull request #3058 from FearlessTobi/port-4948
Port citra-emu/citra#4948: "citra_qt: add amiibo drag and drop support"
2019-11-03 01:55:21 -04:00
FearlessTobi
727ba2f2d0 citra_qt: add amiibo drag and drop support
Co-Authored-By: Valentin Vanelslande <vvanelslandedev@gmail.com>
2019-11-03 05:24:47 +01:00
Rodrigo Locatti
11e39da02b Merge pull request #3054 from FernandoS27/fix-tld4-2
shader_ir: Fix regression on TLD4
2019-10-31 01:56:29 +00:00
Fernando Sahmkow
23cabc98db Shader_IR: Fix regression on TLD4
Originally on the last commit I thought TLD4 acted the same as TLD4S and 
didn't have a mask. It actually does have a component mask. This commit 
corrects that.
2019-10-30 21:14:57 -04:00
Rodrigo Locatti
658489ebf7 Merge pull request #3050 from FernandoS27/fix-tld4
shader_ir: Fix TLD4 and add bindless variant
2019-10-30 18:37:17 +00:00
Fernando Sahmkow
9293c3a0f2 Shader_IR: Fix TLD4 and add Bindless Variant.
This commit fixes an issue where not all 4 results of tld4 were being
written, the color component was defaulted to red, among other things.
It also implements the bindless variant.
2019-10-30 12:02:03 -04:00
Rodrigo Locatti
04b838c857 Merge pull request #3038 from lioncash/docs
kernel/scheduler: Minor changes
2019-10-30 03:47:28 +00:00
bunnei
2382bbe3ac Merge pull request #3046 from ReinUsesLisp/clean-gl-state
gl_state: Miscellaneous clean up
2019-10-29 22:50:04 -04:00
bunnei
b5138f3c35 Merge pull request #3035 from ReinUsesLisp/rasterizer-accelerated
rasterizer_accelerated: Add intermediary for GPU rasterizers
2019-10-29 22:06:41 -04:00
bunnei
a81bd962ab Merge pull request #3007 from DarkLordZach/fsc-regress
savedata_factory: Automatically create certain savedata
2019-10-29 22:05:09 -04:00
Rodrigo Locatti
3d0cde6a75 gl_state: Use std::array::fill instead of std::fill
Co-Authored-By: Mat M. <mathew1800@gmail.com>
2019-10-30 01:30:31 +00:00
ReinUsesLisp
ce20ed8e4e gl_state: Move dirty checks to individual apply calls instead of Apply
This requires removing constness from some methods, but for consistency
it's removed in all methods.
2019-10-29 21:27:25 -03:00
ReinUsesLisp
3c6557c235 gl_state: Remove ApplyDefaultState
OpenGL has defaults values we can trust. Remove these.
2019-10-29 21:27:25 -03:00
ReinUsesLisp
d3651b0b82 gl_state: Change SetDefaultViewports to use default constructor 2019-10-29 21:27:24 -03:00
ReinUsesLisp
c7698d0bc8 gl_state: Minor style changes 2019-10-29 21:27:24 -03:00
ReinUsesLisp
a14d202ac2 gl_state: Remove unused Citra TextureUnits 2019-10-29 21:27:24 -03:00
ReinUsesLisp
28fece8e9b gl_state: Move initializers from constructor to class declaration 2019-10-29 21:27:23 -03:00
Rodrigo Locatti
2ec5b55ee3 Merge pull request #3004 from ReinUsesLisp/maxwell3d-cleanup
maxwell_3d: Remove unused entries
2019-10-29 23:46:33 +00:00
Rodrigo Locatti
9f93ad08a5 Merge pull request #3023 from lioncash/opus
externals: Track upstream opus
2019-10-28 02:45:01 -03:00
Rodrigo Locatti
c5d9589942 Merge pull request #3037 from FernandoS27/new-formats
video_core: Implement texture format E5B9G9R9_SHAREDEXP.
2019-10-28 01:36:58 -03:00
Lioncash
6c8f28813c scheduler: Mark parameter of AskForReselectionOrMarkRedundant() as const
This is only compared against, so it can be made const.
2019-10-27 23:35:50 -04:00
ReinUsesLisp
fa31e5b868 maxwell_3d/kepler_compute: Remove unused arguments in GetTexture 2019-10-28 00:23:42 -03:00
ReinUsesLisp
538ddd220e video_core/textures: Remove unused index entry in FullTextureInfo 2019-10-28 00:14:38 -03:00
ReinUsesLisp
961fe4d19b maxwell_3d: Remove unused method GetStageTextures 2019-10-28 00:14:29 -03:00
Lioncash
f19c1a7cda scheduler: Silence sign conversion warnings 2019-10-27 22:44:52 -04:00
Lioncash
2fb0bbff29 scheduler: Initialize class members directly where applicable
Reduces the overall amount of code.
2019-10-27 22:13:55 -04:00
Lioncash
2dc469ceba scheduler: Amend documentation comments
Adjusts the formatting of a few of the comments an ensures they get
recognized as proper Doxygen comments.
2019-10-27 22:12:32 -04:00
David
4c5731c34f Merge pull request #2971 from FernandoS27/new-scheduler-v2
Kernel: Implement a New Thread Scheduler V2
2019-10-28 10:53:27 +11:00
Fernando Sahmkow
3f9262195b Video_Core: Implement texture format E5B9G9R9_SHAREDEXP.
This commit implements the E5B9G9R9 Texture format into the general 
system and OpenGL backend.
2019-10-27 16:44:09 -04:00
bunnei
6909b2f0f9 Merge pull request #3034 from ReinUsesLisp/w4244-maxwell3d
maxwell_3d: Silence implicit conversion warnings
2019-10-27 15:08:59 -04:00
ReinUsesLisp
3e469cecc1 maxwell_3d: Silence implicit conversion warnings
While we are at it, unify types for dirty reg pointers.
2019-10-27 15:22:17 -03:00
bunnei
7e2494e987 Merge pull request #3033 from ReinUsesLisp/w4244-astc
astc: Silence implicit conversion warnings
2019-10-27 14:09:53 -04:00
ReinUsesLisp
bd2aff3e26 rasterizer_accelerated: Add intermediary for GPU rasterizers
Add an intermediary class that implements common functions across GPU
accelerated rasterizers. This avoids code repetition on different
backends.
2019-10-27 03:40:08 -03:00
ReinUsesLisp
a5aa1bb174 astc: Silence implicit conversion warnings 2019-10-27 03:04:50 -03:00
Rodrigo Locatti
26f3e18c5c Merge pull request #2976 from FernandoS27/cache-fast-brx-rebased
Implement Fast BRX, fix TXQ and addapt the Shader Cache for it
2019-10-26 16:56:13 -03:00
Fernando Sahmkow
be856a38d6 Shader_IR: Address Feedback. 2019-10-26 15:38:30 -04:00
Rodrigo Locatti
a0d79085c4 Merge pull request #3027 from lioncash/lookup
shader_ir: Use std::array with std::pair instead of std::unordered_map
2019-10-26 05:49:15 -03:00
Rodrigo Locatti
d52598173d Merge pull request #3013 from FernandoS27/tld4s-fix
Shader_Ir: Fix TLD4S from using a component mask.
2019-10-25 20:06:26 -03:00
Fernando Sahmkow
e3afd6595a Shader_IR: Clang format 2019-10-25 09:01:32 -04:00
ReinUsesLisp
78f3e8a757 gl_shader_cache: Implement locker variants invalidation 2019-10-25 09:01:32 -04:00
ReinUsesLisp
ec85648af3 gl_shader_disk_cache: Store and load fast BRX 2019-10-25 09:01:31 -04:00
ReinUsesLisp
fa2c297f3e const_buffer_locker: Minor style changes 2019-10-25 09:01:31 -04:00
ReinUsesLisp
7b81ba4d8a gl_shader_decompiler: Move entries to a separate function 2019-10-25 09:01:31 -04:00
Fernando Sahmkow
1244f2d368 Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. 2019-10-25 09:01:31 -04:00
Fernando Sahmkow
a05120ec0b Shader_IR: Correct typo in Consistent method. 2019-10-25 09:01:30 -04:00
Fernando Sahmkow
33fcec3502 Shader_IR: allow lookup of texture samplers within the shader_ir for instructions that don't provide it 2019-10-25 09:01:30 -04:00
Fernando Sahmkow
8909f52166 Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. 2019-10-25 09:01:30 -04:00
Fernando Sahmkow
acd6441134 Shader_Cache: setup connection of ConstBufferLocker 2019-10-25 09:01:29 -04:00
Fernando Sahmkow
1a58f45d76 VideoCore: Unify const buffer accessing along engines and provide ConstBufferLocker class to shaders. 2019-10-25 09:01:29 -04:00
Fernando Sahmkow
2ef696c85a Shader_IR: Implement BRX tracking. 2019-10-25 09:01:29 -04:00
James Rowe
5ee4fb6e12 Merge pull request #3029 from jroweboy/revert
Revert "ci: Add build name to archive root folder"
2019-10-24 12:53:30 -06:00
James Rowe
969f0afa4e Revert "ci: Add build name to archive root folder"
This reverts commit 5e553a6c26.
2019-10-24 12:46:15 -06:00
Rodrigo Locatti
5062728669 Merge pull request #3028 from lioncash/constexpr
shader_bytecode: Make Matcher constexpr capable
2019-10-24 15:10:40 -03:00
Lioncash
7fdf991097 shader_bytecode: Make Matcher constexpr capable
Greatly shrinks the amount of generated code for GetDecodeTable().

Collapses an assembly output of 9000+ lines down to ~3621 with Clang,
and 6513 down to ~2616 with GCC, given it's now allowed to construct all
the entries as a sequence of constant data.
2019-10-24 01:10:10 -04:00
Lioncash
382717172e shader_ir: Use std::array with pair instead of unordered_map
Given the overall size of the maps are very small, we can use arrays of
pairs here instead of always heap allocating a new map every time the
functions are called. Given the small size of the maps, the difference
in container lookups are negligible, especially given the entries are
already sorted.
2019-10-24 00:25:38 -04:00
Rodrigo Locatti
5328d570df Merge pull request #3024 from lioncash/shadow
video_core/shader: Resolve instances of variable shadowing
2019-10-24 00:45:23 -03:00
Lioncash
1f5401c89c video_core/shader: Resolve instances of variable shadowing
Silences a few -Wshadow warnings.
2019-10-23 23:00:31 -04:00
Lioncash
611236c883 externals: Track upstream opus
Tracks upstream opus, allowing the library to be easily updated. While
we're at it, we incorporate the CMakeLists.txt so that we have easy
control over the requirements of the build.
2019-10-23 20:58:54 -04:00
bunnei
012d7f5233 Merge pull request #3022 from DarkLordZach/azure-folder-rename
ci: Add build name to archive root folder
2019-10-23 15:52:37 -04:00
Zach Hilman
5e553a6c26 ci: Add build name to archive root folder 2019-10-23 15:23:43 -04:00
bunnei
6fe89acf0d Merge pull request #2991 from lioncash/npad
hid/npad: Minor cleanup
2019-10-22 19:51:24 -04:00
Zach Hilman
bb207fe27a savedata_factory: Automatically create certain savedata
After further hardware investigation, it appears that some games, perhaps those more lazily coded, will not call EnsureSaveData, meaning that they expect the normal (current) save to be automatically made. Additionally, some games do not create a cache or temporary save before use.
In these 3 specific instances, the save is created automatically for the game if it doesn't exist.
2019-10-22 15:47:38 -04:00
Fernando Sahmkow
c4a0aa9207 Merge pull request #2995 from ReinUsesLisp/ignore-gmem
shader_ir/memory: Ignore global memory when tracking fails
2019-10-22 13:22:43 -04:00
Fernando Sahmkow
7ecf9f7228 Merge pull request #2983 from lioncash/fallthrough
gl_shader_decompiler/vk_shader_decompiler: Resolve implicit fallthrough cases
2019-10-22 13:16:46 -04:00
Fernando Sahmkow
1509d2ffbd Shader_Ir: Fix TLD4S from using a component mask.
TLD4S always outputs 4 values, the previous code checked a component 
mask and omitted those values that weren't part of it. This commit 
corrects that and makes sure all 4 values are set.
2019-10-22 10:59:07 -04:00
ReinUsesLisp
1ea07954fb shader_ir/memory: Ignore global memory when tracking fails
Ignore global memory operations instead of invoking undefined behaviour
when constant buffer tracking fails and we are blasting through asserts,
ignore the operation.

In the case of LDG this means filling the destination registers with
zeroes; for STG this means ignore the instruction as a whole.

The default behaviour is still to abort execution on failure.
2019-10-22 02:49:17 -03:00
Lioncash
8d8e495248 hid/npad: Fix incorrect connection boolean value in ConnectAllDisconnectedControllers()
We should be setting the connection state to true, otherwise we aren't
actually making the controllers connected like the function name
indicates.
2019-10-17 18:19:47 -04:00
Lioncash
d076466f26 hid/npad: Add missing break in default case
While not an issue, it does prevent fallthrough from occurring if
anything is ever added after this case (unlikely to occur, but this
turns a trivial "should not cause issues" into a definite "won't cause
issues).
2019-10-17 18:17:42 -04:00
Lioncash
26c84718c8 hid/npad: Replace std::for_each with ranged for loops
Performs the same behavior, but is built into the core language itself.

No functional change.
2019-10-17 18:16:36 -04:00
Lioncash
e433e99191 hid/npad: Remove redundant non-const variant of IsControllerSupported()
The const qualified variant can also be called in non-const contexts, so
we can remove the non-const variant to eliminate a bit of code
duplication.
2019-10-17 18:11:41 -04:00
Lioncash
a71e8066a1 hid/npad: Move function declarations
Clearly separate these from the variable declarations to make them more
visible.
2019-10-17 18:09:08 -04:00
Lioncash
6947bf8e44 vk_shader_decompiler: Resolve fallthrough within ExprDecompiler's ExprCondCode operator()
This would previously result in NeverExecute and UnusedIndex being
treated as regular predicates.
2019-10-15 19:40:58 -04:00
Lioncash
b42a74ff2c gl_shader_decompiler: Resolve fallthrough within ExprDecompiler's ExprCondCode operator()
This would previously result in NeverExecute and UnusedIndex being
treated as regular predicates.
2019-10-15 19:38:55 -04:00
Fernando Sahmkow
64e652d8cb Kernel Thread: Cleanup THREADPROCESSORID_DONT_UPDATE. 2019-10-15 11:55:30 -04:00
Fernando Sahmkow
e28c7f5217 Kernel: Address Feedback 2 2019-10-15 11:55:28 -04:00
Fernando Sahmkow
a3524879be Kernel: Clang Format 2019-10-15 11:55:27 -04:00
Fernando Sahmkow
c32520ceb7 Kernel: Reverse global accessor removal. 2019-10-15 11:55:26 -04:00
Fernando Sahmkow
3073615dbc Kernel: Address Feedback. 2019-10-15 11:55:25 -04:00
Fernando Sahmkow
25f8606a6d Kernel Scheduler: Make sure the global scheduler shutdowns correctly. 2019-10-15 11:55:24 -04:00
Fernando Sahmkow
b3c1deba49 Kernel_Thread: Eliminate most global accessors. 2019-10-15 11:55:23 -04:00
Fernando Sahmkow
0b72b34d89 KernelSVC: Assert that condition variable address is aligned to 4 bytes. 2019-10-15 11:55:22 -04:00
Fernando Sahmkow
96b1b144af Kernel: Correct Paused scheduling 2019-10-15 11:55:21 -04:00
Fernando Sahmkow
1c6a11ab14 Kernel: Corrections to Wait Objects clearing in which a thread could still be signalled after a timeout or a cancel. 2019-10-15 11:55:20 -04:00
Fernando Sahmkow
27d571c084 Kernel: Correct redundant yields to only advance time forward. 2019-10-15 11:55:20 -04:00
Fernando Sahmkow
7176857177 Kernel: Corrections to ModifyByWaitingCountAndSignalToAddressIfEqual 2019-10-15 11:55:19 -04:00
Fernando Sahmkow
44e09e5f21 Kernel: Correct Results in Condition Variables and Mutexes 2019-10-15 11:55:18 -04:00
Fernando Sahmkow
1ec1e81373 Kernel: Clang Format 2019-10-15 11:55:17 -04:00
Fernando Sahmkow
e05a8c2385 Kernel: Remove global system accessor from WaitObject 2019-10-15 11:55:16 -04:00
Fernando Sahmkow
0cf26cee59 Scheduler: Implement Yield Count and Core migration on Thread Preemption. 2019-10-15 11:55:16 -04:00
Fernando Sahmkow
2d382de6fa Scheduler: Corrections to YieldAndBalanceLoad and Yield bombing protection. 2019-10-15 11:55:15 -04:00
Fernando Sahmkow
b49c0dab87 Kernel: Initial implementation of thread preemption. 2019-10-15 11:55:14 -04:00
Fernando Sahmkow
103f3a2fe5 Scheduler: Add protections for Yield bombing
In case of redundant yields, the scheduler will now idle the core for 
it's timeslice, in order to avoid continuously yielding the same thing 
over and over.
2019-10-15 11:55:13 -04:00
Fernando Sahmkow
82218c925a Kernel: Style and Corrections 2019-10-15 11:55:12 -04:00
Fernando Sahmkow
fcc6b34fff Correct PrepareReschedule 2019-10-15 11:55:12 -04:00
Fernando Sahmkow
3a94e7ea33 Comment and reorganize the scheduler 2019-10-15 11:55:11 -04:00
Fernando Sahmkow
b5d1e44782 Add PrepareReschedule where required. 2019-10-15 11:55:10 -04:00
Fernando Sahmkow
b8b7ebcece Correct compiling errors and addapt to the new interface. 2019-10-15 11:55:09 -04:00
Fernando Sahmkow
9031502974 Correct Supervisor Calls to work with the new scheduler, 2019-10-15 11:55:08 -04:00
Fernando Sahmkow
47c6c78c03 Redesign CPU Cores to work with the new scheduler 2019-10-15 11:55:07 -04:00
Fernando Sahmkow
57a71f899a Add interfacing to the Global Scheduler 2019-10-15 11:55:07 -04:00
Fernando Sahmkow
a1ac0c6cb4 Addapt thread class to the new Scheduler 2019-10-15 11:55:06 -04:00
Fernando Sahmkow
b164d8ee53 Implement a new Core Scheduler 2019-10-15 11:55:04 -04:00
130 changed files with 4452 additions and 2306 deletions

8
.gitmodules vendored
View File

@@ -26,11 +26,11 @@
path = externals/mbedtls
url = https://github.com/DarkLordZach/mbedtls
[submodule "opus"]
path = externals/opus
url = https://github.com/ogniK5377/opus.git
path = externals/opus/opus
url = https://github.com/xiph/opus.git
[submodule "soundtouch"]
path = externals/soundtouch
url = https://github.com/citra-emu/ext-soundtouch.git
path = externals/soundtouch
url = https://github.com/citra-emu/ext-soundtouch.git
[submodule "libressl"]
path = externals/libressl
url = https://github.com/citra-emu/ext-libressl-portable.git

View File

@@ -85,10 +85,12 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/xmad.cpp"
"${VIDEO_CORE}/shader/ast.cpp"
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/expr.cpp"
"${VIDEO_CORE}/shader/expr.h"

1
externals/opus vendored

Submodule externals/opus deleted from 562f8ba555

250
externals/opus/CMakeLists.txt vendored Normal file
View File

@@ -0,0 +1,250 @@
cmake_minimum_required(VERSION 3.8)
project(opus)
option(OPUS_STACK_PROTECTOR "Use stack protection" OFF)
option(OPUS_USE_ALLOCA "Use alloca for stack arrays (on non-C99 compilers)" OFF)
option(OPUS_CUSTOM_MODES "Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames" OFF)
option(OPUS_FIXED_POINT "Compile as fixed-point (for machines without a fast enough FPU)" OFF)
option(OPUS_ENABLE_FLOAT_API "Compile with the floating point API (for machines with float library" ON)
include(opus/opus_functions.cmake)
if(OPUS_STACK_PROTECTOR)
if(NOT MSVC) # GC on by default on MSVC
check_and_set_flag(STACK_PROTECTION_STRONG -fstack-protector-strong)
endif()
else()
if(MSVC)
check_and_set_flag(BUFFER_SECURITY_CHECK /GS-)
endif()
endif()
add_library(opus STATIC
# CELT sources
opus/celt/bands.c
opus/celt/celt.c
opus/celt/celt_decoder.c
opus/celt/celt_encoder.c
opus/celt/celt_lpc.c
opus/celt/cwrs.c
opus/celt/entcode.c
opus/celt/entdec.c
opus/celt/entenc.c
opus/celt/kiss_fft.c
opus/celt/laplace.c
opus/celt/mathops.c
opus/celt/mdct.c
opus/celt/modes.c
opus/celt/pitch.c
opus/celt/quant_bands.c
opus/celt/rate.c
opus/celt/vq.c
# SILK sources
opus/silk/A2NLSF.c
opus/silk/CNG.c
opus/silk/HP_variable_cutoff.c
opus/silk/LPC_analysis_filter.c
opus/silk/LPC_fit.c
opus/silk/LPC_inv_pred_gain.c
opus/silk/LP_variable_cutoff.c
opus/silk/NLSF2A.c
opus/silk/NLSF_VQ.c
opus/silk/NLSF_VQ_weights_laroia.c
opus/silk/NLSF_decode.c
opus/silk/NLSF_del_dec_quant.c
opus/silk/NLSF_encode.c
opus/silk/NLSF_stabilize.c
opus/silk/NLSF_unpack.c
opus/silk/NSQ.c
opus/silk/NSQ_del_dec.c
opus/silk/PLC.c
opus/silk/VAD.c
opus/silk/VQ_WMat_EC.c
opus/silk/ana_filt_bank_1.c
opus/silk/biquad_alt.c
opus/silk/bwexpander.c
opus/silk/bwexpander_32.c
opus/silk/check_control_input.c
opus/silk/code_signs.c
opus/silk/control_SNR.c
opus/silk/control_audio_bandwidth.c
opus/silk/control_codec.c
opus/silk/dec_API.c
opus/silk/decode_core.c
opus/silk/decode_frame.c
opus/silk/decode_indices.c
opus/silk/decode_parameters.c
opus/silk/decode_pitch.c
opus/silk/decode_pulses.c
opus/silk/decoder_set_fs.c
opus/silk/enc_API.c
opus/silk/encode_indices.c
opus/silk/encode_pulses.c
opus/silk/gain_quant.c
opus/silk/init_decoder.c
opus/silk/init_encoder.c
opus/silk/inner_prod_aligned.c
opus/silk/interpolate.c
opus/silk/lin2log.c
opus/silk/log2lin.c
opus/silk/pitch_est_tables.c
opus/silk/process_NLSFs.c
opus/silk/quant_LTP_gains.c
opus/silk/resampler.c
opus/silk/resampler_down2.c
opus/silk/resampler_down2_3.c
opus/silk/resampler_private_AR2.c
opus/silk/resampler_private_IIR_FIR.c
opus/silk/resampler_private_down_FIR.c
opus/silk/resampler_private_up2_HQ.c
opus/silk/resampler_rom.c
opus/silk/shell_coder.c
opus/silk/sigm_Q15.c
opus/silk/sort.c
opus/silk/stereo_LR_to_MS.c
opus/silk/stereo_MS_to_LR.c
opus/silk/stereo_decode_pred.c
opus/silk/stereo_encode_pred.c
opus/silk/stereo_find_predictor.c
opus/silk/stereo_quant_pred.c
opus/silk/sum_sqr_shift.c
opus/silk/table_LSF_cos.c
opus/silk/tables_LTP.c
opus/silk/tables_NLSF_CB_NB_MB.c
opus/silk/tables_NLSF_CB_WB.c
opus/silk/tables_gain.c
opus/silk/tables_other.c
opus/silk/tables_pitch_lag.c
opus/silk/tables_pulses_per_block.c
# Opus sources
opus/src/analysis.c
opus/src/mapping_matrix.c
opus/src/mlp.c
opus/src/mlp_data.c
opus/src/opus.c
opus/src/opus_decoder.c
opus/src/opus_encoder.c
opus/src/opus_multistream.c
opus/src/opus_multistream_decoder.c
opus/src/opus_multistream_encoder.c
opus/src/opus_projection_decoder.c
opus/src/opus_projection_encoder.c
opus/src/repacketizer.c
)
if (DEBUG)
target_sources(opus PRIVATE opus/silk/debug.c)
endif()
if (OPUS_FIXED_POINT)
target_sources(opus PRIVATE
opus/silk/fixed/LTP_analysis_filter_FIX.c
opus/silk/fixed/LTP_scale_ctrl_FIX.c
opus/silk/fixed/apply_sine_window_FIX.c
opus/silk/fixed/autocorr_FIX.c
opus/silk/fixed/burg_modified_FIX.c
opus/silk/fixed/corrMatrix_FIX.c
opus/silk/fixed/encode_frame_FIX.c
opus/silk/fixed/find_LPC_FIX.c
opus/silk/fixed/find_LTP_FIX.c
opus/silk/fixed/find_pitch_lags_FIX.c
opus/silk/fixed/find_pred_coefs_FIX.c
opus/silk/fixed/k2a_FIX.c
opus/silk/fixed/k2a_Q16_FIX.c
opus/silk/fixed/noise_shape_analysis_FIX.c
opus/silk/fixed/pitch_analysis_core_FIX.c
opus/silk/fixed/prefilter_FIX.c
opus/silk/fixed/process_gains_FIX.c
opus/silk/fixed/regularize_correlations_FIX.c
opus/silk/fixed/residual_energy16_FIX.c
opus/silk/fixed/residual_energy_FIX.c
opus/silk/fixed/schur64_FIX.c
opus/silk/fixed/schur_FIX.c
opus/silk/fixed/solve_LS_FIX.c
opus/silk/fixed/vector_ops_FIX.c
opus/silk/fixed/warped_autocorrelation_FIX.c
)
else()
target_sources(opus PRIVATE
opus/silk/float/LPC_analysis_filter_FLP.c
opus/silk/float/LPC_inv_pred_gain_FLP.c
opus/silk/float/LTP_analysis_filter_FLP.c
opus/silk/float/LTP_scale_ctrl_FLP.c
opus/silk/float/apply_sine_window_FLP.c
opus/silk/float/autocorrelation_FLP.c
opus/silk/float/burg_modified_FLP.c
opus/silk/float/bwexpander_FLP.c
opus/silk/float/corrMatrix_FLP.c
opus/silk/float/encode_frame_FLP.c
opus/silk/float/energy_FLP.c
opus/silk/float/find_LPC_FLP.c
opus/silk/float/find_LTP_FLP.c
opus/silk/float/find_pitch_lags_FLP.c
opus/silk/float/find_pred_coefs_FLP.c
opus/silk/float/inner_product_FLP.c
opus/silk/float/k2a_FLP.c
opus/silk/float/noise_shape_analysis_FLP.c
opus/silk/float/pitch_analysis_core_FLP.c
opus/silk/float/process_gains_FLP.c
opus/silk/float/regularize_correlations_FLP.c
opus/silk/float/residual_energy_FLP.c
opus/silk/float/scale_copy_vector_FLP.c
opus/silk/float/scale_vector_FLP.c
opus/silk/float/schur_FLP.c
opus/silk/float/sort_FLP.c
opus/silk/float/warped_autocorrelation_FLP.c
opus/silk/float/wrappers_FLP.c
)
endif()
target_compile_definitions(opus PRIVATE OPUS_BUILD ENABLE_HARDENING)
if(NOT MSVC)
target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=2)
endif()
# It is strongly recommended to uncomment one of these VAR_ARRAYS: Use C99
# variable-length arrays for stack allocation USE_ALLOCA: Use alloca() for stack
# allocation If none is defined, then the fallback is a non-threadsafe global
# array
if(OPUS_USE_ALLOCA OR MSVC)
target_compile_definitions(opus PRIVATE USE_ALLOCA)
else()
target_compile_definitions(opus PRIVATE VAR_ARRAYS)
endif()
if(OPUS_CUSTOM_MODES)
target_compile_definitions(opus PRIVATE CUSTOM_MODES)
endif()
if(NOT OPUS_ENABLE_FLOAT_API)
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
endif()
target_compile_definitions(opus
PUBLIC
-DOPUS_VERSION="\\"1.3.1\\""
PRIVATE
# Use C99 intrinsics to speed up float-to-int conversion
HAVE_LRINTF
)
if (FIXED_POINT)
target_compile_definitions(opus PRIVATE -DFIXED_POINT=1 -DDISABLE_FLOAT_API)
endif()
target_include_directories(opus
PUBLIC
opus/include
PRIVATE
opus/celt
opus/silk
opus/silk/fixed
opus/silk/float
opus/src
)

1
externals/opus/opus vendored Submodule

Submodule externals/opus/opus added at ad8fe90db7

View File

@@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/xmad.cpp"
"${VIDEO_CORE}/shader/ast.cpp"
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/expr.cpp"
"${VIDEO_CORE}/shader/expr.h"

View File

@@ -28,18 +28,14 @@ __declspec(noinline, noreturn)
}
#define ASSERT(_a_) \
do \
if (!(_a_)) { \
assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); }); \
} \
while (0)
if (!(_a_)) { \
LOG_CRITICAL(Debug, "Assertion Failed!"); \
}
#define ASSERT_MSG(_a_, ...) \
do \
if (!(_a_)) { \
assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \
} \
while (0)
if (!(_a_)) { \
LOG_CRITICAL(Debug, "Assertion Failed! " __VA_ARGS__); \
}
#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)

View File

@@ -168,11 +168,11 @@ public:
constexpr BitField(BitField&&) noexcept = default;
constexpr BitField& operator=(BitField&&) noexcept = default;
constexpr FORCE_INLINE operator T() const {
constexpr operator T() const {
return Value();
}
constexpr FORCE_INLINE void Assign(const T& value) {
constexpr void Assign(const T& value) {
storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
}

View File

@@ -6,6 +6,8 @@
#include <cstddef>
#include <cstring>
#include <utility>
#include <boost/functional/hash.hpp>
#include "common/cityhash.h"
#include "common/common_types.h"
@@ -68,4 +70,13 @@ struct HashableStruct {
}
};
struct PairHash {
template <class T1, class T2>
std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
std::size_t seed = std::hash<T1>()(pair.first);
boost::hash_combine(seed, std::hash<T2>()(pair.second));
return seed;
}
};
} // namespace Common

View File

@@ -304,6 +304,13 @@ public:
return levels[priority == Depth ? 63 : priority].back();
}
void clear() {
used_priorities = 0;
for (std::size_t i = 0; i < Depth; i++) {
levels[i].clear();
}
}
private:
using const_list_iterator = typename std::list<T>::const_iterator;

View File

@@ -409,6 +409,12 @@ void System::PrepareReschedule() {
CurrentCpuCore().PrepareReschedule();
}
void System::PrepareReschedule(const u32 core_index) {
if (core_index < GlobalScheduler().CpuCoresCount()) {
CpuCore(core_index).PrepareReschedule();
}
}
PerfStatsResults System::GetAndResetPerfStats() {
return impl->GetAndResetPerfStats();
}
@@ -449,6 +455,16 @@ const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
return CpuCore(core_index).Scheduler();
}
/// Gets the global scheduler
Kernel::GlobalScheduler& System::GlobalScheduler() {
return impl->kernel.GlobalScheduler();
}
/// Gets the global scheduler
const Kernel::GlobalScheduler& System::GlobalScheduler() const {
return impl->kernel.GlobalScheduler();
}
Kernel::Process* System::CurrentProcess() {
return impl->kernel.CurrentProcess();
}

View File

@@ -24,6 +24,7 @@ class VfsFilesystem;
} // namespace FileSys
namespace Kernel {
class GlobalScheduler;
class KernelCore;
class Process;
class Scheduler;
@@ -184,6 +185,9 @@ public:
/// Prepare the core emulation for a reschedule
void PrepareReschedule();
/// Prepare the core emulation for a reschedule
void PrepareReschedule(u32 core_index);
/// Gets and resets core performance statistics
PerfStatsResults GetAndResetPerfStats();
@@ -238,6 +242,12 @@ public:
/// Gets the scheduler for the CPU core with the specified index
const Kernel::Scheduler& Scheduler(std::size_t core_index) const;
/// Gets the global scheduler
Kernel::GlobalScheduler& GlobalScheduler();
/// Gets the global scheduler
const Kernel::GlobalScheduler& GlobalScheduler() const;
/// Provides a pointer to the current process
Kernel::Process* CurrentProcess();

View File

@@ -52,7 +52,8 @@ bool CpuBarrier::Rendezvous() {
Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
std::size_t core_index)
: cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
: cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()},
core_timing{system.CoreTiming()}, core_index{core_index} {
#ifdef ARCHITECTURE_x86_64
arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
#else
@@ -60,7 +61,7 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
#endif
scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index);
}
Cpu::~Cpu() = default;
@@ -81,21 +82,21 @@ void Cpu::RunLoop(bool tight_loop) {
return;
}
Reschedule();
// If we don't have a currently active thread then don't execute instructions,
// instead advance to the next event and try to yield to the next thread
if (Kernel::GetCurrentThread() == nullptr) {
LOG_TRACE(Core, "Core-{} idling", core_index);
core_timing.Idle();
core_timing.Advance();
PrepareReschedule();
} else {
if (tight_loop) {
arm_interface->Run();
} else {
arm_interface->Step();
}
core_timing.Advance();
}
core_timing.Advance();
Reschedule();
}
@@ -106,18 +107,18 @@ void Cpu::SingleStep() {
void Cpu::PrepareReschedule() {
arm_interface->PrepareReschedule();
reschedule_pending = true;
}
void Cpu::Reschedule() {
if (!reschedule_pending) {
return;
}
reschedule_pending = false;
// Lock the global kernel mutex when we manipulate the HLE state
std::lock_guard lock{HLE::g_hle_lock};
scheduler->Reschedule();
std::lock_guard lock(HLE::g_hle_lock);
global_scheduler.SelectThread(core_index);
scheduler->TryDoContextSwitch();
}
void Cpu::Shutdown() {
scheduler->Shutdown();
}
} // namespace Core

View File

@@ -12,8 +12,9 @@
#include "common/common_types.h"
namespace Kernel {
class GlobalScheduler;
class Scheduler;
}
} // namespace Kernel
namespace Core {
class System;
@@ -83,6 +84,8 @@ public:
return core_index;
}
void Shutdown();
static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(std::size_t num_cores);
private:
@@ -90,6 +93,7 @@ private:
std::unique_ptr<ARM_Interface> arm_interface;
CpuBarrier& cpu_barrier;
Kernel::GlobalScheduler& global_scheduler;
std::unique_ptr<Kernel::Scheduler> scheduler;
Timing::CoreTiming& core_timing;

View File

@@ -58,6 +58,7 @@ void CpuCoreManager::Shutdown() {
thread_to_cpu.clear();
for (auto& cpu_core : cores) {
cpu_core->Shutdown();
cpu_core.reset();
}

View File

@@ -16,6 +16,7 @@ namespace FileSys {
constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size";
namespace {
void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) {
if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
if (meta.zero_1 != 0) {
@@ -52,6 +53,13 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) {
meta.user_id[1], meta.user_id[0]);
}
}
bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) {
return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage ||
(space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User
desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0);
}
} // Anonymous namespace
std::string SaveDataDescriptor::DebugInfo() const {
@@ -96,6 +104,10 @@ ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space,
auto out = dir->GetDirectoryRelative(save_directory);
if (out == nullptr && ShouldSaveDataBeAutomaticallyCreated(space, meta)) {
return Create(space, meta);
}
// Return an error if the save data doesn't actually exist.
if (out == nullptr) {
// TODO(Subv): Find out correct error code.

View File

@@ -202,13 +202,11 @@ void RegisterModule(std::string name, VAddr beg, VAddr end, bool add_elf_ext) {
}
static Kernel::Thread* FindThreadById(s64 id) {
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
for (auto& thread : threads) {
if (thread->GetThreadID() == static_cast<u64>(id)) {
current_core = core;
return thread.get();
}
const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
for (auto& thread : threads) {
if (thread->GetThreadID() == static_cast<u64>(id)) {
current_core = thread->GetProcessorID();
return thread.get();
}
}
return nullptr;
@@ -647,11 +645,9 @@ static void HandleQuery() {
SendReply(buffer.c_str());
} else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
std::string val = "m";
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
for (const auto& thread : threads) {
val += fmt::format("{:x},", thread->GetThreadID());
}
const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
for (const auto& thread : threads) {
val += fmt::format("{:x},", thread->GetThreadID());
}
val.pop_back();
SendReply(val.c_str());
@@ -661,13 +657,11 @@ static void HandleQuery() {
std::string buffer;
buffer += "l<?xml version=\"1.0\"?>";
buffer += "<threads>";
for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
for (const auto& thread : threads) {
buffer +=
fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
thread->GetThreadID(), core, thread->GetThreadID());
}
const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
for (const auto& thread : threads) {
buffer +=
fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
thread->GetThreadID(), thread->GetProcessorID(), thread->GetThreadID());
}
buffer += "</threads>";
SendReply(buffer.c_str());

View File

@@ -22,6 +22,7 @@ namespace Kernel {
namespace {
// Wake up num_to_wake (or all) threads in a vector.
void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
auto& system = Core::System::GetInstance();
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
@@ -35,6 +36,7 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
waiting_threads[i]->SetArbiterWaitAddress(0);
waiting_threads[i]->ResumeFromWait();
system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
}
}
} // Anonymous namespace
@@ -89,12 +91,20 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
// Determine the modified value depending on the waiting count.
s32 updated_value;
if (waiting_threads.empty()) {
updated_value = value + 1;
} else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
updated_value = value - 1;
if (num_to_wake <= 0) {
if (waiting_threads.empty()) {
updated_value = value + 1;
} else {
updated_value = value - 1;
}
} else {
updated_value = value;
if (waiting_threads.empty()) {
updated_value = value + 1;
} else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
updated_value = value - 1;
} else {
updated_value = value;
}
}
if (static_cast<s32>(Memory::Read32(address)) != value) {
@@ -169,30 +179,22 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
current_thread->WakeAfterDelay(timeout);
system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(current_thread->GetProcessorID());
return RESULT_TIMEOUT;
}
std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
const auto RetrieveWaitingThreads = [this](std::size_t core_index,
std::vector<SharedPtr<Thread>>& waiting_threads,
VAddr arb_addr) {
const auto& scheduler = system.Scheduler(core_index);
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetArbiterWaitAddress() == arb_addr) {
waiting_threads.push_back(thread);
}
}
};
// Retrieve all threads that are waiting for this address.
std::vector<SharedPtr<Thread>> threads;
RetrieveWaitingThreads(0, threads, address);
RetrieveWaitingThreads(1, threads, address);
RetrieveWaitingThreads(2, threads, address);
RetrieveWaitingThreads(3, threads, address);
const auto& scheduler = system.GlobalScheduler();
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetArbiterWaitAddress() == address) {
threads.push_back(thread);
}
}
// Sort them by priority, such that the highest priority ones come first.
std::sort(threads.begin(), threads.end(),

View File

@@ -58,8 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
auto& kernel = Core::System::GetInstance().Kernel();
if (!writable_event) {
// Create event if not provided
const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
"HLE Pause Event: " + reason);
const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason);
writable_event = pair.writable;
}

View File

@@ -12,12 +12,15 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
@@ -58,12 +61,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
if (thread->HasWakeupCallback()) {
resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
}
}
if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
thread->GetWaitHandle() != 0) {
ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
thread->GetStatus() == ThreadStatus::WaitCondVar);
} else if (thread->GetStatus() == ThreadStatus::WaitMutex ||
thread->GetStatus() == ThreadStatus::WaitCondVar) {
thread->SetMutexWaitAddress(0);
thread->SetCondVarWaitAddress(0);
thread->SetWaitHandle(0);
@@ -83,18 +82,23 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
}
if (resume) {
if (thread->GetStatus() == ThreadStatus::WaitCondVar ||
thread->GetStatus() == ThreadStatus::WaitArb) {
thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
}
thread->ResumeFromWait();
}
}
struct KernelCore::Impl {
explicit Impl(Core::System& system) : system{system} {}
explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}
void Initialize(KernelCore& kernel) {
Shutdown();
InitializeSystemResourceLimit(kernel);
InitializeThreads();
InitializePreemption();
}
void Shutdown() {
@@ -110,6 +114,9 @@ struct KernelCore::Impl {
thread_wakeup_callback_handle_table.Clear();
thread_wakeup_event_type = nullptr;
preemption_event = nullptr;
global_scheduler.Shutdown();
named_ports.clear();
}
@@ -132,6 +139,18 @@ struct KernelCore::Impl {
system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
}
void InitializePreemption() {
preemption_event = system.CoreTiming().RegisterEvent(
"PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
global_scheduler.PreemptThreads();
s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
});
s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
}
std::atomic<u32> next_object_id{0};
std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin};
std::atomic<u64> next_user_process_id{Process::ProcessIDMin};
@@ -140,10 +159,12 @@ struct KernelCore::Impl {
// Lists all processes that exist in the current session.
std::vector<SharedPtr<Process>> process_list;
Process* current_process = nullptr;
Kernel::GlobalScheduler global_scheduler;
SharedPtr<ResourceLimit> system_resource_limit;
Core::Timing::EventType* thread_wakeup_event_type = nullptr;
Core::Timing::EventType* preemption_event = nullptr;
// TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
// allowing us to simply use a pool index or similar.
Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -203,6 +224,14 @@ const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
return impl->process_list;
}
Kernel::GlobalScheduler& KernelCore::GlobalScheduler() {
return impl->global_scheduler;
}
const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
return impl->global_scheduler;
}
void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
impl->named_ports.emplace(std::move(name), std::move(port));
}

View File

@@ -21,6 +21,7 @@ namespace Kernel {
class AddressArbiter;
class ClientPort;
class GlobalScheduler;
class HandleTable;
class Process;
class ResourceLimit;
@@ -75,6 +76,12 @@ public:
/// Retrieves the list of processes.
const std::vector<SharedPtr<Process>>& GetProcessList() const;
/// Gets the sole instance of the global scheduler
Kernel::GlobalScheduler& GlobalScheduler();
/// Gets the sole instance of the global scheduler
const Kernel::GlobalScheduler& GlobalScheduler() const;
/// Adds a port to the named port table
void AddNamedPort(std::string name, SharedPtr<ClientPort> port);

View File

@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
@@ -78,7 +79,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
// thread.
ASSERT(requesting_thread == current_thread);
const u32 addr_value = Memory::Read32(address);
u32 addr_value = Memory::Read32(address);
// If the mutex isn't being held, just return success.
if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
@@ -89,6 +90,20 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
return ERR_INVALID_HANDLE;
}
// This a workaround where an unknown bug writes the mutex value to give ownership to a cond var
// waiting thread.
if (holding_thread->GetStatus() == ThreadStatus::WaitCondVar) {
if (holding_thread->GetMutexWaitAddress() == address) {
Release(address, holding_thread.get());
addr_value = Memory::Read32(address);
if (addr_value == 0)
return RESULT_SUCCESS;
else {
holding_thread = handle_table.Get<Thread>(addr_value & Mutex::MutexOwnerMask);
}
}
}
// Wait until the mutex is released
current_thread->SetMutexWaitAddress(address);
current_thread->SetWaitHandle(requesting_thread_handle);
@@ -104,14 +119,13 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
return RESULT_SUCCESS;
}
ResultCode Mutex::Release(VAddr address) {
ResultCode Mutex::Release(VAddr address, Thread* holding_thread) {
// The mutex address must be 4-byte aligned
if ((address % sizeof(u32)) != 0) {
return ERR_INVALID_ADDRESS;
}
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(holding_thread, address);
// There are no more threads waiting for the mutex, release it completely.
if (thread == nullptr) {
@@ -120,7 +134,7 @@ ResultCode Mutex::Release(VAddr address) {
}
// Transfer the ownership of the mutex from the previous owner to the new one.
TransferMutexOwnership(address, current_thread, thread);
TransferMutexOwnership(address, holding_thread, thread);
u32 mutex_value = thread->GetWaitHandle();
@@ -139,6 +153,12 @@ ResultCode Mutex::Release(VAddr address) {
thread->SetCondVarWaitAddress(0);
thread->SetMutexWaitAddress(0);
thread->SetWaitHandle(0);
thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
if (thread->GetProcessorID() >= 0)
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
if (holding_thread->GetProcessorID() >= 0)
system.CpuCore(holding_thread->GetProcessorID()).PrepareReschedule();
return RESULT_SUCCESS;
}

View File

@@ -29,7 +29,7 @@ public:
Handle requesting_thread_handle);
/// Releases the mutex at the specified address.
ResultCode Release(VAddr address);
ResultCode Release(VAddr address, Thread* holding_thread);
private:
Core::System& system;

View File

@@ -32,11 +32,6 @@ enum class HandleType : u32 {
ServerSession,
};
enum class ResetType {
Automatic, ///< Reset automatically on object acquisition
Manual, ///< Never reset automatically
};
class Object : NonCopyable {
public:
explicit Object(KernelCore& kernel);

View File

@@ -213,10 +213,7 @@ void Process::PrepareForTermination() {
}
};
stop_threads(system.Scheduler(0).GetThreadList());
stop_threads(system.Scheduler(1).GetThreadList());
stop_threads(system.Scheduler(2).GetThreadList());
stop_threads(system.Scheduler(3).GetThreadList());
stop_threads(system.GlobalScheduler().GetThreadList());
FreeTLSRegion(tls_region_address);
tls_region_address = 0;

View File

@@ -20,15 +20,13 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
void ReadableEvent::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
if (reset_type == ResetType::Automatic) {
signaled = false;
}
}
void ReadableEvent::Signal() {
signaled = true;
WakeupAllWaitingThreads();
if (!signaled) {
signaled = true;
WakeupAllWaitingThreads();
};
}
void ReadableEvent::Clear() {

View File

@@ -27,10 +27,6 @@ public:
return name;
}
ResetType GetResetType() const {
return reset_type;
}
static constexpr HandleType HANDLE_TYPE = HandleType::ReadableEvent;
HandleType GetHandleType() const override {
return HANDLE_TYPE;
@@ -55,8 +51,7 @@ private:
void Signal();
ResetType reset_type;
bool signaled;
bool signaled{};
std::string name; ///< Name of event (optional)
};

View File

@@ -1,8 +1,13 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
//
// SelectThreads, Yield functions originally by TuxSH.
// licensed under GPLv2 or later under exception provided by the author.
#include <algorithm>
#include <set>
#include <unordered_set>
#include <utility>
#include "common/assert.h"
@@ -17,56 +22,374 @@
namespace Kernel {
std::mutex Scheduler::scheduler_mutex;
GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {}
Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
: cpu_core{cpu_core}, system{system} {}
GlobalScheduler::~GlobalScheduler() = default;
Scheduler::~Scheduler() {
for (auto& thread : thread_list) {
thread->Stop();
void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
thread_list.push_back(std::move(thread));
}
void GlobalScheduler::RemoveThread(const Thread* thread) {
thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
thread_list.end());
}
void GlobalScheduler::UnloadThread(s32 core) {
Scheduler& sched = system.Scheduler(core);
sched.UnloadThread();
}
void GlobalScheduler::SelectThread(u32 core) {
const auto update_thread = [](Thread* thread, Scheduler& sched) {
if (thread != sched.selected_thread) {
if (thread == nullptr) {
++sched.idle_selection_count;
}
sched.selected_thread = thread;
}
sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;
std::atomic_thread_fence(std::memory_order_seq_cst);
};
Scheduler& sched = system.Scheduler(core);
Thread* current_thread = nullptr;
// Step 1: Get top thread in schedule queue.
current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
if (current_thread) {
update_thread(current_thread, sched);
return;
}
// Step 2: Try selecting a suggested thread.
Thread* winner = nullptr;
std::set<s32> sug_cores;
for (auto thread : suggested_queue[core]) {
s32 this_core = thread->GetProcessorID();
Thread* thread_on_core = nullptr;
if (this_core >= 0) {
thread_on_core = scheduled_queue[this_core].front();
}
if (this_core < 0 || thread != thread_on_core) {
winner = thread;
break;
}
sug_cores.insert(this_core);
}
// if we got a suggested thread, select it, else do a second pass.
if (winner && winner->GetPriority() > 2) {
if (winner->IsRunning()) {
UnloadThread(winner->GetProcessorID());
}
TransferToCore(winner->GetPriority(), core, winner);
update_thread(winner, sched);
return;
}
// Step 3: Select a suggested thread from another core
for (auto& src_core : sug_cores) {
auto it = scheduled_queue[src_core].begin();
it++;
if (it != scheduled_queue[src_core].end()) {
Thread* thread_on_core = scheduled_queue[src_core].front();
Thread* to_change = *it;
if (thread_on_core->IsRunning() || to_change->IsRunning()) {
UnloadThread(src_core);
}
TransferToCore(thread_on_core->GetPriority(), core, thread_on_core);
current_thread = thread_on_core;
break;
}
}
update_thread(current_thread, sched);
}
bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
// Note: caller should use critical section, etc.
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
const u32 priority = yielding_thread->GetPriority();
// Yield the thread
const Thread* const winner = scheduled_queue[core_id].front(priority);
ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front");
scheduled_queue[core_id].yield(priority);
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
// Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
// etc.
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
const u32 priority = yielding_thread->GetPriority();
// Yield the thread
ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
"Thread yielding without being in front");
scheduled_queue[core_id].yield(priority);
std::array<Thread*, NUM_CPU_CORES> current_threads;
for (u32 i = 0; i < NUM_CPU_CORES; i++) {
current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
}
Thread* next_thread = scheduled_queue[core_id].front(priority);
Thread* winner = nullptr;
for (auto& thread : suggested_queue[core_id]) {
const s32 source_core = thread->GetProcessorID();
if (source_core >= 0) {
if (current_threads[source_core] != nullptr) {
if (thread == current_threads[source_core] ||
current_threads[source_core]->GetPriority() < min_regular_priority) {
continue;
}
}
}
if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
next_thread->GetPriority() < thread->GetPriority()) {
if (thread->GetPriority() <= priority) {
winner = thread;
break;
}
}
}
if (winner != nullptr) {
if (winner != yielding_thread) {
if (winner->IsRunning()) {
UnloadThread(winner->GetProcessorID());
}
TransferToCore(winner->GetPriority(), core_id, winner);
}
} else {
winner = next_thread;
}
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
// Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
// etc.
Thread* winner = nullptr;
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
// Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
// If the core is idle, perform load balancing, excluding the threads that have just used this
// function...
if (scheduled_queue[core_id].empty()) {
// Here, "current_threads" is calculated after the ""yield"", unlike yield -1
std::array<Thread*, NUM_CPU_CORES> current_threads;
for (u32 i = 0; i < NUM_CPU_CORES; i++) {
current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
}
for (auto& thread : suggested_queue[core_id]) {
const s32 source_core = thread->GetProcessorID();
if (source_core < 0 || thread == current_threads[source_core]) {
continue;
}
if (current_threads[source_core] == nullptr ||
current_threads[source_core]->GetPriority() >= min_regular_priority) {
winner = thread;
}
break;
}
if (winner != nullptr) {
if (winner != yielding_thread) {
if (winner->IsRunning()) {
UnloadThread(winner->GetProcessorID());
}
TransferToCore(winner->GetPriority(), core_id, winner);
}
} else {
winner = yielding_thread;
}
}
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
void GlobalScheduler::PreemptThreads() {
for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
const u32 priority = preemption_priorities[core_id];
if (scheduled_queue[core_id].size(priority) > 0) {
scheduled_queue[core_id].front(priority)->IncrementYieldCount();
scheduled_queue[core_id].yield(priority);
if (scheduled_queue[core_id].size(priority) > 1) {
scheduled_queue[core_id].front(priority)->IncrementYieldCount();
}
}
Thread* current_thread =
scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front();
Thread* winner = nullptr;
for (auto& thread : suggested_queue[core_id]) {
const s32 source_core = thread->GetProcessorID();
if (thread->GetPriority() != priority) {
continue;
}
if (source_core >= 0) {
Thread* next_thread = scheduled_queue[source_core].empty()
? nullptr
: scheduled_queue[source_core].front();
if (next_thread != nullptr && next_thread->GetPriority() < 2) {
break;
}
if (next_thread == thread) {
continue;
}
}
if (current_thread != nullptr &&
current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
winner = thread;
break;
}
}
if (winner != nullptr) {
if (winner->IsRunning()) {
UnloadThread(winner->GetProcessorID());
}
TransferToCore(winner->GetPriority(), s32(core_id), winner);
current_thread =
winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
}
if (current_thread != nullptr && current_thread->GetPriority() > priority) {
for (auto& thread : suggested_queue[core_id]) {
const s32 source_core = thread->GetProcessorID();
if (thread->GetPriority() < priority) {
continue;
}
if (source_core >= 0) {
Thread* next_thread = scheduled_queue[source_core].empty()
? nullptr
: scheduled_queue[source_core].front();
if (next_thread != nullptr && next_thread->GetPriority() < 2) {
break;
}
if (next_thread == thread) {
continue;
}
}
if (current_thread != nullptr &&
current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
winner = thread;
break;
}
}
if (winner != nullptr) {
if (winner->IsRunning()) {
UnloadThread(winner->GetProcessorID());
}
TransferToCore(winner->GetPriority(), s32(core_id), winner);
current_thread = winner;
}
}
is_reselection_pending.store(true, std::memory_order_release);
}
}
void GlobalScheduler::Suggest(u32 priority, u32 core, Thread* thread) {
suggested_queue[core].add(thread, priority);
}
void GlobalScheduler::Unsuggest(u32 priority, u32 core, Thread* thread) {
suggested_queue[core].remove(thread, priority);
}
void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
scheduled_queue[core].add(thread, priority);
}
void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
scheduled_queue[core].add(thread, priority, false);
}
void GlobalScheduler::Reschedule(u32 priority, u32 core, Thread* thread) {
scheduled_queue[core].remove(thread, priority);
scheduled_queue[core].add(thread, priority);
}
void GlobalScheduler::Unschedule(u32 priority, u32 core, Thread* thread) {
scheduled_queue[core].remove(thread, priority);
}
void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
const s32 source_core = thread->GetProcessorID();
if (source_core == destination_core || !schedulable) {
return;
}
thread->SetProcessorID(destination_core);
if (source_core >= 0) {
Unschedule(priority, source_core, thread);
}
if (destination_core >= 0) {
Unsuggest(priority, destination_core, thread);
Schedule(priority, destination_core, thread);
}
if (source_core >= 0) {
Suggest(priority, source_core, thread);
}
}
bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
const Thread* winner) {
if (current_thread == winner) {
current_thread->IncrementYieldCount();
return true;
} else {
is_reselection_pending.store(true, std::memory_order_release);
return false;
}
}
void GlobalScheduler::Shutdown() {
for (std::size_t core = 0; core < NUM_CPU_CORES; core++) {
scheduled_queue[core].clear();
suggested_queue[core].clear();
}
thread_list.clear();
}
Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id)
: system(system), cpu_core(cpu_core), core_id(core_id) {}
Scheduler::~Scheduler() = default;
bool Scheduler::HaveReadyThreads() const {
std::lock_guard lock{scheduler_mutex};
return !ready_queue.empty();
return system.GlobalScheduler().HaveReadyThreads(core_id);
}
Thread* Scheduler::GetCurrentThread() const {
return current_thread.get();
}
Thread* Scheduler::GetSelectedThread() const {
return selected_thread.get();
}
void Scheduler::SelectThreads() {
system.GlobalScheduler().SelectThread(core_id);
}
u64 Scheduler::GetLastContextSwitchTicks() const {
return last_context_switch_time;
}
Thread* Scheduler::PopNextReadyThread() {
Thread* next = nullptr;
Thread* thread = GetCurrentThread();
if (thread && thread->GetStatus() == ThreadStatus::Running) {
if (ready_queue.empty()) {
return thread;
}
// We have to do better than the current thread.
// This call returns null when that's not possible.
next = ready_queue.front();
if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
next = thread;
}
} else {
if (ready_queue.empty()) {
return nullptr;
}
next = ready_queue.front();
void Scheduler::TryDoContextSwitch() {
if (is_context_switch_pending) {
SwitchContext();
}
return next;
}
void Scheduler::SwitchContext(Thread* new_thread) {
Thread* previous_thread = GetCurrentThread();
void Scheduler::UnloadThread() {
Thread* const previous_thread = GetCurrentThread();
Process* const previous_process = system.Kernel().CurrentProcess();
UpdateLastContextSwitchTime(previous_thread, previous_process);
@@ -80,23 +403,52 @@ void Scheduler::SwitchContext(Thread* new_thread) {
if (previous_thread->GetStatus() == ThreadStatus::Running) {
// This is only the case when a reschedule is triggered without the current thread
// yielding execution (i.e. an event triggered, system core time-sliced, etc)
ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
previous_thread->SetStatus(ThreadStatus::Ready);
}
previous_thread->SetIsRunning(false);
}
current_thread = nullptr;
}
void Scheduler::SwitchContext() {
Thread* const previous_thread = GetCurrentThread();
Thread* const new_thread = GetSelectedThread();
is_context_switch_pending = false;
if (new_thread == previous_thread) {
return;
}
Process* const previous_process = system.Kernel().CurrentProcess();
UpdateLastContextSwitchTime(previous_thread, previous_process);
// Save context for previous thread
if (previous_thread) {
cpu_core.SaveContext(previous_thread->GetContext());
// Save the TPIDR_EL0 system register in case it was modified.
previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
if (previous_thread->GetStatus() == ThreadStatus::Running) {
// This is only the case when a reschedule is triggered without the current thread
// yielding execution (i.e. an event triggered, system core time-sliced, etc)
previous_thread->SetStatus(ThreadStatus::Ready);
}
previous_thread->SetIsRunning(false);
}
// Load context of new thread
if (new_thread) {
ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
"Thread must be assigned to this core.");
ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
"Thread must be ready to become running.");
// Cancel any outstanding wakeup events for this thread
new_thread->CancelWakeupTimer();
current_thread = new_thread;
ready_queue.remove(new_thread, new_thread->GetPriority());
new_thread->SetStatus(ThreadStatus::Running);
new_thread->SetIsRunning(true);
auto* const thread_owner_process = current_thread->GetOwnerProcess();
if (previous_process != thread_owner_process) {
@@ -130,124 +482,9 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
last_context_switch_time = most_recent_switch_ticks;
}
void Scheduler::Reschedule() {
std::lock_guard lock{scheduler_mutex};
Thread* cur = GetCurrentThread();
Thread* next = PopNextReadyThread();
if (cur && next) {
LOG_TRACE(Kernel, "context switch {} -> {}", cur->GetObjectId(), next->GetObjectId());
} else if (cur) {
LOG_TRACE(Kernel, "context switch {} -> idle", cur->GetObjectId());
} else if (next) {
LOG_TRACE(Kernel, "context switch idle -> {}", next->GetObjectId());
}
SwitchContext(next);
}
void Scheduler::AddThread(SharedPtr<Thread> thread) {
std::lock_guard lock{scheduler_mutex};
thread_list.push_back(std::move(thread));
}
void Scheduler::RemoveThread(Thread* thread) {
std::lock_guard lock{scheduler_mutex};
thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
thread_list.end());
}
void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
std::lock_guard lock{scheduler_mutex};
ASSERT(thread->GetStatus() == ThreadStatus::Ready);
ready_queue.add(thread, priority);
}
void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
std::lock_guard lock{scheduler_mutex};
ASSERT(thread->GetStatus() == ThreadStatus::Ready);
ready_queue.remove(thread, priority);
}
void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
std::lock_guard lock{scheduler_mutex};
if (thread->GetPriority() == priority) {
return;
}
// If thread was ready, adjust queues
if (thread->GetStatus() == ThreadStatus::Ready)
ready_queue.adjust(thread, thread->GetPriority(), priority);
}
Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
std::lock_guard lock{scheduler_mutex};
const u32 mask = 1U << core;
for (auto* thread : ready_queue) {
if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
return thread;
}
}
return nullptr;
}
void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
ASSERT(thread != nullptr);
// Avoid yielding if the thread isn't even running.
ASSERT(thread->GetStatus() == ThreadStatus::Running);
// Sanity check that the priority is valid
ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
// Yield this thread -- sleep for zero time and force reschedule to different thread
GetCurrentThread()->Sleep(0);
}
void Scheduler::YieldWithLoadBalancing(Thread* thread) {
ASSERT(thread != nullptr);
const auto priority = thread->GetPriority();
const auto core = static_cast<u32>(thread->GetProcessorID());
// Avoid yielding if the thread isn't even running.
ASSERT(thread->GetStatus() == ThreadStatus::Running);
// Sanity check that the priority is valid
ASSERT(priority < THREADPRIO_COUNT);
// Sleep for zero time to be able to force reschedule to different thread
GetCurrentThread()->Sleep(0);
Thread* suggested_thread = nullptr;
// Search through all of the cpu cores (except this one) for a suggested thread.
// Take the first non-nullptr one
for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
const auto res =
system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
// If scheduler provides a suggested thread
if (res != nullptr) {
// And its better than the current suggested thread (or is the first valid one)
if (suggested_thread == nullptr ||
suggested_thread->GetPriority() > res->GetPriority()) {
suggested_thread = res;
}
}
}
// If a suggested thread was found, queue that for this core
if (suggested_thread != nullptr)
suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
}
void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
void Scheduler::Shutdown() {
current_thread = nullptr;
selected_thread = nullptr;
}
} // namespace Kernel

View File

@@ -20,124 +20,185 @@ namespace Kernel {
class Process;
class Scheduler final {
class GlobalScheduler final {
public:
explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
~Scheduler();
static constexpr u32 NUM_CPU_CORES = 4;
/// Returns whether there are any threads that are ready to run.
bool HaveReadyThreads() const;
/// Reschedules to the next available thread (call after current thread is suspended)
void Reschedule();
/// Gets the current running thread
Thread* GetCurrentThread() const;
/// Gets the timestamp for the last context switch in ticks.
u64 GetLastContextSwitchTicks() const;
explicit GlobalScheduler(Core::System& system);
~GlobalScheduler();
/// Adds a new thread to the scheduler
void AddThread(SharedPtr<Thread> thread);
/// Removes a thread from the scheduler
void RemoveThread(Thread* thread);
/// Schedules a thread that has become "ready"
void ScheduleThread(Thread* thread, u32 priority);
/// Unschedules a thread that was already scheduled
void UnscheduleThread(Thread* thread, u32 priority);
/// Sets the priority of a thread in the scheduler
void SetThreadPriority(Thread* thread, u32 priority);
/// Gets the next suggested thread for load balancing
Thread* GetNextSuggestedThread(u32 core, u32 minimum_priority) const;
/**
* YieldWithoutLoadBalancing -- analogous to normal yield on a system
* Moves the thread to the end of the ready queue for its priority, and then reschedules the
* system to the new head of the queue.
*
* Example (Single Core -- but can be extrapolated to multi):
* ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
* Currently Running: ThreadR
*
* ThreadR calls YieldWithoutLoadBalancing
*
* ThreadR is moved to the end of ready_queue[prio=0]:
* ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
* Currently Running: Nothing
*
* System is rescheduled (ThreadA is popped off of queue):
* ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
* Currently Running: ThreadA
*
* If the queue is empty at time of call, no yielding occurs. This does not cross between cores
* or priorities at all.
*/
void YieldWithoutLoadBalancing(Thread* thread);
/**
* YieldWithLoadBalancing -- yield but with better selection of the new running thread
* Moves the current thread to the end of the ready queue for its priority, then selects a
* 'suggested thread' (a thread on a different core that could run on this core) from the
* scheduler, changes its core, and reschedules the current core to that thread.
*
* Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
* single core):
* ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
* ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
* Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
*
* ThreadQ calls YieldWithLoadBalancing
*
* ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
* ready_queue[core=0][prio=0]: ThreadA, ThreadB
* ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
* Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
*
* A list of suggested threads for each core is compiled
* Suggested Threads: {ThreadC on Core 1}
* If this were quad core (as the switch is), there could be between 0 and 3 threads in this
* list. If there are more than one, the thread is selected by highest prio.
*
* ThreadC is core changed to Core 0:
* ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
* ready_queue[core=1][prio=0]: ThreadD
* Currently Running: None on Core 0 || ThreadP on Core 1
*
* System is rescheduled (ThreadC is popped off of queue):
* ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
* ready_queue[core=1][prio=0]: ThreadD
* Currently Running: ThreadC on Core 0 || ThreadP on Core 1
*
* If no suggested threads can be found this will behave just as normal yield. If there are
* multiple candidates for the suggested thread on a core, the highest prio is taken.
*/
void YieldWithLoadBalancing(Thread* thread);
/// Currently unknown -- asserts as unimplemented on call
void YieldAndWaitForLoadBalancing(Thread* thread);
void RemoveThread(const Thread* thread);
/// Returns a list of all threads managed by the scheduler
const std::vector<SharedPtr<Thread>>& GetThreadList() const {
return thread_list;
}
private:
/**
* Pops and returns the next thread from the thread queue
* @return A pointer to the next ready thread
* Add a thread to the suggested queue of a cpu core. Suggested threads may be
* picked if no thread is scheduled to run on the core.
*/
Thread* PopNextReadyThread();
void Suggest(u32 priority, u32 core, Thread* thread);
/**
* Switches the CPU's active thread context to that of the specified thread
* @param new_thread The thread to switch to
* Remove a thread to the suggested queue of a cpu core. Suggested threads may be
* picked if no thread is scheduled to run on the core.
*/
void SwitchContext(Thread* new_thread);
void Unsuggest(u32 priority, u32 core, Thread* thread);
/**
* Add a thread to the scheduling queue of a cpu core. The thread is added at the
* back the queue in its priority level.
*/
void Schedule(u32 priority, u32 core, Thread* thread);
/**
* Add a thread to the scheduling queue of a cpu core. The thread is added at the
* front the queue in its priority level.
*/
void SchedulePrepend(u32 priority, u32 core, Thread* thread);
/// Reschedule an already scheduled thread based on a new priority
void Reschedule(u32 priority, u32 core, Thread* thread);
/// Unschedules a thread.
void Unschedule(u32 priority, u32 core, Thread* thread);
/**
* Transfers a thread into an specific core. If the destination_core is -1
* it will be unscheduled from its source code and added into its suggested
* queue.
*/
void TransferToCore(u32 priority, s32 destination_core, Thread* thread);
/// Selects a core and forces it to unload its current thread's context
void UnloadThread(s32 core);
/**
* Takes care of selecting the new scheduled thread in three steps:
*
* 1. First a thread is selected from the top of the priority queue. If no thread
* is obtained then we move to step two, else we are done.
*
* 2. Second we try to get a suggested thread that's not assigned to any core or
* that is not the top thread in that core.
*
* 3. Third is no suggested thread is found, we do a second pass and pick a running
* thread in another core and swap it with its current thread.
*/
void SelectThread(u32 core);
bool HaveReadyThreads(u32 core_id) const {
return !scheduled_queue[core_id].empty();
}
/**
* Takes a thread and moves it to the back of the it's priority list.
*
* @note This operation can be redundant and no scheduling is changed if marked as so.
*/
bool YieldThread(Thread* thread);
/**
* Takes a thread and moves it to the back of the it's priority list.
* Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
* a better priority than the next thread in the core.
*
* @note This operation can be redundant and no scheduling is changed if marked as so.
*/
bool YieldThreadAndBalanceLoad(Thread* thread);
/**
* Takes a thread and moves it out of the scheduling queue.
* and into the suggested queue. If no thread can be scheduled afterwards in that core,
* a suggested thread is obtained instead.
*
* @note This operation can be redundant and no scheduling is changed if marked as so.
*/
bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
/**
* Rotates the scheduling queues of threads at a preemption priority and then does
* some core rebalancing. Preemption priorities can be found in the array
* 'preemption_priorities'.
*
* @note This operation happens every 10ms.
*/
void PreemptThreads();
u32 CpuCoresCount() const {
return NUM_CPU_CORES;
}
void SetReselectionPending() {
is_reselection_pending.store(true, std::memory_order_release);
}
bool IsReselectionPending() const {
return is_reselection_pending.load(std::memory_order_acquire);
}
void Shutdown();
private:
bool AskForReselectionOrMarkRedundant(Thread* current_thread, const Thread* winner);
static constexpr u32 min_regular_priority = 2;
std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
std::atomic<bool> is_reselection_pending{false};
// The priority levels at which the global scheduler preempts threads every 10 ms. They are
// ordered from Core 0 to Core 3.
std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
/// Lists all thread ids that aren't deleted/etc.
std::vector<SharedPtr<Thread>> thread_list;
Core::System& system;
};
class Scheduler final {
public:
explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id);
~Scheduler();
/// Returns whether there are any threads that are ready to run.
bool HaveReadyThreads() const;
/// Reschedules to the next available thread (call after current thread is suspended)
void TryDoContextSwitch();
/// Unloads currently running thread
void UnloadThread();
/// Select the threads in top of the scheduling multilist.
void SelectThreads();
/// Gets the current running thread
Thread* GetCurrentThread() const;
/// Gets the currently selected thread from the top of the multilevel queue
Thread* GetSelectedThread() const;
/// Gets the timestamp for the last context switch in ticks.
u64 GetLastContextSwitchTicks() const;
bool ContextSwitchPending() const {
return is_context_switch_pending;
}
/// Shutdowns the scheduler.
void Shutdown();
private:
friend class GlobalScheduler;
/// Switches the CPU's active thread context to that of the specified thread
void SwitchContext();
/**
* Called on every context switch to update the internal timestamp
@@ -152,19 +213,16 @@ private:
*/
void UpdateLastContextSwitchTime(Thread* thread, Process* process);
/// Lists all thread ids that aren't deleted/etc.
std::vector<SharedPtr<Thread>> thread_list;
/// Lists only ready thread ids.
Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
SharedPtr<Thread> current_thread = nullptr;
Core::ARM_Interface& cpu_core;
u64 last_context_switch_time = 0;
SharedPtr<Thread> selected_thread = nullptr;
Core::System& system;
static std::mutex scheduler_mutex;
Core::ARM_Interface& cpu_core;
u64 last_context_switch_time = 0;
u64 idle_selection_count = 0;
const u32 core_id;
bool is_context_switch_pending = false;
};
} // namespace Kernel

View File

@@ -516,7 +516,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
thread->WakeAfterDelay(nano_seconds);
thread->SetWakeupCallback(DefaultThreadWakeupCallback);
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_TIMEOUT;
}
@@ -534,6 +534,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
}
thread->CancelWait();
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -577,7 +578,8 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
}
auto* const current_process = system.Kernel().CurrentProcess();
return current_process->GetMutex().Release(mutex_addr);
return current_process->GetMutex().Release(mutex_addr,
system.CurrentScheduler().GetCurrentThread());
}
enum class BreakType : u32 {
@@ -1066,6 +1068,8 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
}
thread->SetActivity(static_cast<ThreadActivity>(activity));
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -1147,7 +1151,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
thread->SetPriority(priority);
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -1503,7 +1507,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
thread->SetName(
fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -1525,7 +1529,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
thread->ResumeFromWait();
if (thread->GetStatus() == ThreadStatus::Ready) {
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(thread->GetProcessorID());
}
return RESULT_SUCCESS;
@@ -1537,7 +1541,7 @@ static void ExitThread(Core::System& system) {
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->Stop();
system.CurrentScheduler().RemoveThread(current_thread);
system.GlobalScheduler().RemoveThread(current_thread);
system.PrepareReschedule();
}
@@ -1553,17 +1557,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
auto& scheduler = system.CurrentScheduler();
auto* const current_thread = scheduler.GetCurrentThread();
bool is_redundant = false;
if (nanoseconds <= 0) {
switch (static_cast<SleepType>(nanoseconds)) {
case SleepType::YieldWithoutLoadBalancing:
scheduler.YieldWithoutLoadBalancing(current_thread);
is_redundant = current_thread->YieldSimple();
break;
case SleepType::YieldWithLoadBalancing:
scheduler.YieldWithLoadBalancing(current_thread);
is_redundant = current_thread->YieldAndBalanceLoad();
break;
case SleepType::YieldAndWaitForLoadBalancing:
scheduler.YieldAndWaitForLoadBalancing(current_thread);
is_redundant = current_thread->YieldAndWaitForLoadBalancing();
break;
default:
UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1572,10 +1577,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
current_thread->Sleep(nanoseconds);
}
// Reschedule all CPU cores
for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
system.CpuCore(i).PrepareReschedule();
if (is_redundant) {
// If it's redundant, the core is pretty much idle. Some games keep idling
// a core while it's doing nothing, we advance timing to avoid costly continuous
// calls.
system.CoreTiming().AddTicks(2000);
}
system.PrepareReschedule(current_thread->GetProcessorID());
}
/// Wait process wide key atomic
@@ -1601,17 +1609,21 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
return ERR_INVALID_ADDRESS;
}
ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
auto* const current_process = system.Kernel().CurrentProcess();
const auto& handle_table = current_process->GetHandleTable();
SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
ASSERT(thread);
const auto release_result = current_process->GetMutex().Release(mutex_addr);
SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
const auto release_result =
current_process->GetMutex().Release(mutex_addr, current_thread.get());
if (release_result.IsError()) {
return release_result;
}
SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->SetCondVarWaitAddress(condition_variable_addr);
current_thread->SetMutexWaitAddress(mutex_addr);
current_thread->SetWaitHandle(thread_handle);
@@ -1622,7 +1634,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
// Note: Deliberately don't attempt to inherit the lock owner's priority.
system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
system.PrepareReschedule(current_thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -1632,24 +1644,19 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
condition_variable_addr, target);
const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
std::vector<SharedPtr<Thread>>& waiting_threads,
VAddr condvar_addr) {
const auto& scheduler = system.Scheduler(core_index);
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetCondVarWaitAddress() == condvar_addr)
waiting_threads.push_back(thread);
}
};
ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
// Retrieve a list of all threads that are waiting for this condition variable.
std::vector<SharedPtr<Thread>> waiting_threads;
RetrieveWaitingThreads(0, waiting_threads, condition_variable_addr);
RetrieveWaitingThreads(1, waiting_threads, condition_variable_addr);
RetrieveWaitingThreads(2, waiting_threads, condition_variable_addr);
RetrieveWaitingThreads(3, waiting_threads, condition_variable_addr);
const auto& scheduler = system.GlobalScheduler();
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetCondVarWaitAddress() == condition_variable_addr) {
waiting_threads.push_back(thread);
}
}
// Sort them by priority, such that the highest priority ones come first.
std::sort(waiting_threads.begin(), waiting_threads.end(),
[](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
@@ -1679,18 +1686,20 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
// Atomically read the value of the mutex.
u32 mutex_val = 0;
u32 update_val = 0;
const VAddr mutex_address = thread->GetMutexWaitAddress();
do {
monitor.SetExclusive(current_core, thread->GetMutexWaitAddress());
monitor.SetExclusive(current_core, mutex_address);
// If the mutex is not yet acquired, acquire it.
mutex_val = Memory::Read32(thread->GetMutexWaitAddress());
mutex_val = Memory::Read32(mutex_address);
if (mutex_val != 0) {
monitor.ClearExclusive();
break;
update_val = mutex_val | Mutex::MutexHasWaitersFlag;
} else {
update_val = thread->GetWaitHandle();
}
} while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
thread->GetWaitHandle()));
} while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
if (mutex_val == 0) {
// We were able to acquire the mutex, resume this thread.
ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
@@ -1704,20 +1713,9 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
thread->SetLockOwner(nullptr);
thread->SetMutexWaitAddress(0);
thread->SetWaitHandle(0);
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
system.PrepareReschedule(thread->GetProcessorID());
} else {
// Atomically signal that the mutex now has a waiting thread.
do {
monitor.SetExclusive(current_core, thread->GetMutexWaitAddress());
// Ensure that the mutex value is still what we expect.
u32 value = Memory::Read32(thread->GetMutexWaitAddress());
// TODO(Subv): When this happens, the kernel just clears the exclusive state and
// retries the initial read for this thread.
ASSERT_MSG(mutex_val == value, "Unhandled synchronization primitive case");
} while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
mutex_val | Mutex::MutexHasWaitersFlag));
// The mutex is already owned by some other thread, make this thread wait on it.
const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1728,6 +1726,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
thread->SetStatus(ThreadStatus::WaitMutex);
owner->AddMutexWaiter(thread);
system.PrepareReschedule(thread->GetProcessorID());
}
}
@@ -1754,7 +1753,12 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
const ResultCode result =
address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
if (result == RESULT_SUCCESS) {
system.PrepareReschedule();
}
return result;
}
// Signals to an address (via Address Arbiter)
@@ -2040,7 +2044,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_HANDLE;
}
system.PrepareReschedule(thread->GetProcessorID());
thread->ChangeCore(core, affinity_mask);
system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
@@ -2095,7 +2102,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
auto& kernel = system.Kernel();
const auto [readable_event, writable_event] =
WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
WritableEvent::CreateEventPair(kernel, "CreateEvent");
HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
@@ -2151,6 +2158,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
}
writable_event->Signal();
system.PrepareReschedule();
return RESULT_SUCCESS;
}

View File

@@ -45,15 +45,7 @@ void Thread::Stop() {
callback_handle);
kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
callback_handle = 0;
// Clean up thread from ready queue
// This is only needed when the thread is terminated forcefully (SVC TerminateProcess)
if (status == ThreadStatus::Ready || status == ThreadStatus::Paused) {
scheduler->UnscheduleThread(this, current_priority);
}
status = ThreadStatus::Dead;
SetStatus(ThreadStatus::Dead);
WakeupAllWaitingThreads();
// Clean up any dangling references in objects that this thread was waiting for
@@ -132,17 +124,16 @@ void Thread::ResumeFromWait() {
wakeup_callback = nullptr;
if (activity == ThreadActivity::Paused) {
status = ThreadStatus::Paused;
SetStatus(ThreadStatus::Paused);
return;
}
status = ThreadStatus::Ready;
ChangeScheduler();
SetStatus(ThreadStatus::Ready);
}
void Thread::CancelWait() {
ASSERT(GetStatus() == ThreadStatus::WaitSynch);
ClearWaitObjects();
SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED);
ResumeFromWait();
}
@@ -205,9 +196,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
thread->name = std::move(name);
thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
thread->owner_process = &owner_process;
auto& scheduler = kernel.GlobalScheduler();
scheduler.AddThread(thread);
thread->tls_address = thread->owner_process->CreateTLSRegion();
thread->scheduler = &system.Scheduler(processor_id);
thread->scheduler->AddThread(thread);
thread->owner_process->RegisterThread(thread.get());
@@ -250,6 +241,22 @@ void Thread::SetStatus(ThreadStatus new_status) {
return;
}
switch (new_status) {
case ThreadStatus::Ready:
case ThreadStatus::Running:
SetSchedulingStatus(ThreadSchedStatus::Runnable);
break;
case ThreadStatus::Dormant:
SetSchedulingStatus(ThreadSchedStatus::None);
break;
case ThreadStatus::Dead:
SetSchedulingStatus(ThreadSchedStatus::Exited);
break;
default:
SetSchedulingStatus(ThreadSchedStatus::Paused);
break;
}
if (status == ThreadStatus::Running) {
last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
}
@@ -311,8 +318,7 @@ void Thread::UpdatePriority() {
return;
}
scheduler->SetThreadPriority(this, new_priority);
current_priority = new_priority;
SetCurrentPriority(new_priority);
if (!lock_owner) {
return;
@@ -328,47 +334,7 @@ void Thread::UpdatePriority() {
}
void Thread::ChangeCore(u32 core, u64 mask) {
ideal_core = core;
affinity_mask = mask;
ChangeScheduler();
}
void Thread::ChangeScheduler() {
if (status != ThreadStatus::Ready) {
return;
}
auto& system = Core::System::GetInstance();
std::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
if (!new_processor_id) {
new_processor_id = processor_id;
}
if (ideal_core != -1 && system.Scheduler(ideal_core).GetCurrentThread() == nullptr) {
new_processor_id = ideal_core;
}
ASSERT(*new_processor_id < 4);
// Add thread to new core's scheduler
auto& next_scheduler = system.Scheduler(*new_processor_id);
if (*new_processor_id != processor_id) {
// Remove thread from previous core's scheduler
scheduler->RemoveThread(this);
next_scheduler.AddThread(this);
}
processor_id = *new_processor_id;
// If the thread was ready, unschedule from the previous core and schedule on the new core
scheduler->UnscheduleThread(this, current_priority);
next_scheduler.ScheduleThread(this, current_priority);
// Change thread's scheduler
scheduler = &next_scheduler;
system.CpuCore(processor_id).PrepareReschedule();
SetCoreAndAffinityMask(core, mask);
}
bool Thread::AllWaitObjectsReady() const {
@@ -388,10 +354,8 @@ void Thread::SetActivity(ThreadActivity value) {
if (value == ThreadActivity::Paused) {
// Set status if not waiting
if (status == ThreadStatus::Ready) {
status = ThreadStatus::Paused;
} else if (status == ThreadStatus::Running) {
status = ThreadStatus::Paused;
if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
SetStatus(ThreadStatus::Paused);
Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
}
} else if (status == ThreadStatus::Paused) {
@@ -408,6 +372,170 @@ void Thread::Sleep(s64 nanoseconds) {
WakeAfterDelay(nanoseconds);
}
bool Thread::YieldSimple() {
auto& scheduler = kernel.GlobalScheduler();
return scheduler.YieldThread(this);
}
bool Thread::YieldAndBalanceLoad() {
auto& scheduler = kernel.GlobalScheduler();
return scheduler.YieldThreadAndBalanceLoad(this);
}
bool Thread::YieldAndWaitForLoadBalancing() {
auto& scheduler = kernel.GlobalScheduler();
return scheduler.YieldThreadAndWaitForLoadBalancing(this);
}
void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
const u32 old_flags = scheduling_state;
scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
static_cast<u32>(new_status);
AdjustSchedulingOnStatus(old_flags);
}
void Thread::SetCurrentPriority(u32 new_priority) {
const u32 old_priority = std::exchange(current_priority, new_priority);
AdjustSchedulingOnPriority(old_priority);
}
ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
const auto HighestSetCore = [](u64 mask, u32 max_cores) {
for (s32 core = max_cores - 1; core >= 0; core--) {
if (((mask >> core) & 1) != 0) {
return core;
}
}
return -1;
};
const bool use_override = affinity_override_count != 0;
if (new_core == THREADPROCESSORID_DONT_UPDATE) {
new_core = use_override ? ideal_core_override : ideal_core;
if ((new_affinity_mask & (1ULL << new_core)) == 0) {
return ERR_INVALID_COMBINATION;
}
}
if (use_override) {
ideal_core_override = new_core;
affinity_mask_override = new_affinity_mask;
} else {
const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask);
ideal_core = new_core;
if (old_affinity_mask != new_affinity_mask) {
const s32 old_core = processor_id;
if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
if (ideal_core < 0) {
processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
} else {
processor_id = ideal_core;
}
}
AdjustSchedulingOnAffinity(old_affinity_mask, old_core);
}
}
return RESULT_SUCCESS;
}
void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
if (old_flags == scheduling_state) {
return;
}
auto& scheduler = kernel.GlobalScheduler();
if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
ThreadSchedStatus::Runnable) {
// In this case the thread was running, now it's pausing/exitting
if (processor_id >= 0) {
scheduler.Unschedule(current_priority, processor_id, this);
}
for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
scheduler.Unsuggest(current_priority, static_cast<u32>(core), this);
}
}
} else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
// The thread is now set to running from being stopped
if (processor_id >= 0) {
scheduler.Schedule(current_priority, processor_id, this);
}
for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
scheduler.Suggest(current_priority, static_cast<u32>(core), this);
}
}
}
scheduler.SetReselectionPending();
}
void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
return;
}
auto& scheduler = Core::System::GetInstance().GlobalScheduler();
if (processor_id >= 0) {
scheduler.Unschedule(old_priority, processor_id, this);
}
for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
scheduler.Unsuggest(old_priority, core, this);
}
}
// Add thread to the new priority queues.
Thread* current_thread = GetCurrentThread();
if (processor_id >= 0) {
if (current_thread == this) {
scheduler.SchedulePrepend(current_priority, processor_id, this);
} else {
scheduler.Schedule(current_priority, processor_id, this);
}
}
for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
scheduler.Suggest(current_priority, core, this);
}
}
scheduler.SetReselectionPending();
}
void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
auto& scheduler = Core::System::GetInstance().GlobalScheduler();
if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
current_priority >= THREADPRIO_COUNT) {
return;
}
for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (((old_affinity_mask >> core) & 1) != 0) {
if (core == old_core) {
scheduler.Unschedule(current_priority, core, this);
} else {
scheduler.Unsuggest(current_priority, core, this);
}
}
}
for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
if (((affinity_mask >> core) & 1) != 0) {
if (core == processor_id) {
scheduler.Schedule(current_priority, core, this);
} else {
scheduler.Suggest(current_priority, core, this);
}
}
}
scheduler.SetReselectionPending();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/**

View File

@@ -75,6 +75,26 @@ enum class ThreadActivity : u32 {
Paused = 1,
};
enum class ThreadSchedStatus : u32 {
None = 0,
Paused = 1,
Runnable = 2,
Exited = 3,
};
enum class ThreadSchedFlags : u32 {
ProcessPauseFlag = 1 << 4,
ThreadPauseFlag = 1 << 5,
ProcessDebugPauseFlag = 1 << 6,
KernelInitPauseFlag = 1 << 8,
};
enum class ThreadSchedMasks : u32 {
LowMask = 0x000f,
HighMask = 0xfff0,
ForcePauseMask = 0x0070,
};
class Thread final : public WaitObject {
public:
using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
@@ -278,6 +298,10 @@ public:
return processor_id;
}
void SetProcessorID(s32 new_core) {
processor_id = new_core;
}
Process* GetOwnerProcess() {
return owner_process;
}
@@ -295,6 +319,9 @@ public:
}
void ClearWaitObjects() {
for (const auto& waiting_object : wait_objects) {
waiting_object->RemoveWaitingThread(this);
}
wait_objects.clear();
}
@@ -383,11 +410,47 @@ public:
/// Sleeps this thread for the given amount of nanoseconds.
void Sleep(s64 nanoseconds);
/// Yields this thread without rebalancing loads.
bool YieldSimple();
/// Yields this thread and does a load rebalancing.
bool YieldAndBalanceLoad();
/// Yields this thread and if the core is left idle, loads are rebalanced
bool YieldAndWaitForLoadBalancing();
void IncrementYieldCount() {
yield_count++;
}
u64 GetYieldCount() const {
return yield_count;
}
ThreadSchedStatus GetSchedulingStatus() const {
return static_cast<ThreadSchedStatus>(scheduling_state &
static_cast<u32>(ThreadSchedMasks::LowMask));
}
bool IsRunning() const {
return is_running;
}
void SetIsRunning(bool value) {
is_running = value;
}
private:
explicit Thread(KernelCore& kernel);
~Thread() override;
void ChangeScheduler();
void SetSchedulingStatus(ThreadSchedStatus new_status);
void SetCurrentPriority(u32 new_priority);
ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
void AdjustSchedulingOnStatus(u32 old_flags);
void AdjustSchedulingOnPriority(u32 old_priority);
void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
Core::ARM_Interface::ThreadContext context{};
@@ -409,6 +472,8 @@ private:
u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
u64 last_running_ticks = 0; ///< CPU tick when thread was last running
u64 yield_count = 0; ///< Number of redundant yields carried by this thread.
///< a redundant yield is one where no scheduling is changed
s32 processor_id = 0;
@@ -453,6 +518,13 @@ private:
ThreadActivity activity = ThreadActivity::Normal;
s32 ideal_core_override = -1;
u64 affinity_mask_override = 0x1;
u32 affinity_override_count = 0;
u32 scheduling_state = 0;
bool is_running = false;
std::string name;
};

View File

@@ -6,6 +6,9 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/thread.h"
@@ -82,9 +85,6 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
const std::size_t index = thread->GetWaitObjectIndex(this);
for (const auto& object : thread->GetWaitObjects()) {
object->RemoveWaitingThread(thread.get());
}
thread->ClearWaitObjects();
thread->CancelWakeupTimer();
@@ -95,6 +95,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
}
if (resume) {
thread->ResumeFromWait();
Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
}
}

View File

@@ -15,8 +15,7 @@ namespace Kernel {
WritableEvent::WritableEvent(KernelCore& kernel) : Object{kernel} {}
WritableEvent::~WritableEvent() = default;
EventPair WritableEvent::CreateEventPair(KernelCore& kernel, ResetType reset_type,
std::string name) {
EventPair WritableEvent::CreateEventPair(KernelCore& kernel, std::string name) {
SharedPtr<WritableEvent> writable_event(new WritableEvent(kernel));
SharedPtr<ReadableEvent> readable_event(new ReadableEvent(kernel));
@@ -24,7 +23,6 @@ EventPair WritableEvent::CreateEventPair(KernelCore& kernel, ResetType reset_typ
writable_event->readable = readable_event;
readable_event->name = name + ":Readable";
readable_event->signaled = false;
readable_event->reset_type = reset_type;
return {std::move(readable_event), std::move(writable_event)};
}
@@ -33,10 +31,6 @@ SharedPtr<ReadableEvent> WritableEvent::GetReadableEvent() const {
return readable;
}
ResetType WritableEvent::GetResetType() const {
return readable->reset_type;
}
void WritableEvent::Signal() {
readable->Signal();
}

View File

@@ -24,11 +24,9 @@ public:
/**
* Creates an event
* @param kernel The kernel instance to create this event under.
* @param reset_type ResetType describing how to create event
* @param name Optional name of event
*/
static EventPair CreateEventPair(KernelCore& kernel, ResetType reset_type,
std::string name = "Unknown");
static EventPair CreateEventPair(KernelCore& kernel, std::string name = "Unknown");
std::string GetTypeName() const override {
return "WritableEvent";
@@ -44,8 +42,6 @@ public:
SharedPtr<ReadableEvent> GetReadableEvent() const;
ResetType GetResetType() const;
void Signal();
void Clear();
bool IsSignaled() const;

View File

@@ -289,8 +289,8 @@ ISelfController::ISelfController(Core::System& system,
RegisterHandlers(functions);
auto& kernel = system.Kernel();
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"ISelfController:LaunchableEvent");
launchable_event =
Kernel::WritableEvent::CreateEventPair(kernel, "ISelfController:LaunchableEvent");
// This event is created by AM on the first time GetAccumulatedSuspendedTickChangedEvent() is
// called. Yuzu can just create it unconditionally, since it doesn't need to support multiple
@@ -298,7 +298,7 @@ ISelfController::ISelfController(Core::System& system,
// suspended if the event has previously been created by a call to
// GetAccumulatedSuspendedTickChangedEvent.
accumulated_suspended_tick_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ISelfController:AccumulatedSuspendedTickChangedEvent");
kernel, "ISelfController:AccumulatedSuspendedTickChangedEvent");
accumulated_suspended_tick_changed_event.writable->Signal();
}
@@ -523,10 +523,10 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest
}
AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"AMMessageQueue:OnMessageRecieved");
on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
on_new_message =
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved");
on_operation_mode_changed =
Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
}
AppletMessageQueue::~AppletMessageQueue() = default;
@@ -1091,7 +1091,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
auto& kernel = system.Kernel();
gpu_error_detected_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "IApplicationFunctions:GpuErrorDetectedSystemEvent");
kernel, "IApplicationFunctions:GpuErrorDetectedSystemEvent");
}
IApplicationFunctions::~IApplicationFunctions() = default;

View File

@@ -24,12 +24,12 @@
namespace Service::AM::Applets {
AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) {
state_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
state_changed_event =
Kernel::WritableEvent::CreateEventPair(kernel, "ILibraryAppletAccessor:StateChangedEvent");
pop_out_data_event =
Kernel::WritableEvent::CreateEventPair(kernel, "ILibraryAppletAccessor:PopDataOutEvent");
pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
kernel, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
}
AppletDataBroker::~AppletDataBroker() = default;

View File

@@ -67,8 +67,8 @@ AOC_U::AOC_U(Core::System& system)
RegisterHandlers(functions);
auto& kernel = system.Kernel();
aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"GetAddOnContentListChanged:Event");
aoc_change_event =
Kernel::WritableEvent::CreateEventPair(kernel, "GetAddOnContentListChanged:Event");
}
AOC_U::~AOC_U() = default;

View File

@@ -65,8 +65,8 @@ public:
RegisterHandlers(functions);
// This is the event handle used to check if the audio buffer was released
buffer_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
buffer_event =
Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased");
stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
audio_params.channel_count, std::move(unique_name),

View File

@@ -47,8 +47,8 @@ public:
// clang-format on
RegisterHandlers(functions);
system_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
system_event =
Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent");
renderer = std::make_unique<AudioCore::AudioRenderer>(
system.CoreTiming(), audren_params, system_event.writable, instance_number);
}
@@ -180,17 +180,17 @@ public:
RegisterHandlers(functions);
auto& kernel = system.Kernel();
buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IAudioOutBufferReleasedEvent");
buffer_event =
Kernel::WritableEvent::CreateEventPair(kernel, "IAudioOutBufferReleasedEvent");
// Should be similar to audio_output_device_switch_event
audio_input_device_switch_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioInputDeviceSwitchedEvent");
kernel, "IAudioDevice:AudioInputDeviceSwitchedEvent");
// Should only be signalled when an audio output device has been changed, example: speaker
// to headset
audio_output_device_switch_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IAudioDevice:AudioOutputDeviceSwitchedEvent");
kernel, "IAudioDevice:AudioOutputDeviceSwitchedEvent");
}
private:

View File

@@ -13,8 +13,7 @@ namespace Service::BCAT {
ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel,
std::string_view event_name) {
event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic,
std::string("ProgressServiceBackend:UpdateEvent:").append(event_name));
kernel, std::string("ProgressServiceBackend:UpdateEvent:").append(event_name));
}
Kernel::SharedPtr<Kernel::ReadableEvent> ProgressServiceBackend::GetEvent() const {

View File

@@ -34,8 +34,7 @@ public:
RegisterHandlers(functions);
auto& kernel = system.Kernel();
register_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
register_event = Kernel::WritableEvent::CreateEventPair(kernel, "BT:RegisterEvent");
}
private:

View File

@@ -57,14 +57,12 @@ public:
RegisterHandlers(functions);
auto& kernel = system.Kernel();
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IBtmUserCore:ScanEvent");
connection_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
service_discovery = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IBtmUserCore:ConfigEvent");
scan_event = Kernel::WritableEvent::CreateEventPair(kernel, "IBtmUserCore:ScanEvent");
connection_event =
Kernel::WritableEvent::CreateEventPair(kernel, "IBtmUserCore:ConnectionEvent");
service_discovery =
Kernel::WritableEvent::CreateEventPair(kernel, "IBtmUserCore:Discovery");
config_event = Kernel::WritableEvent::CreateEventPair(kernel, "IBtmUserCore:ConfigEvent");
}
private:

View File

@@ -40,7 +40,10 @@ static FileSys::VirtualDir GetDirectoryRelativeWrapped(FileSys::VirtualDir base,
if (dir_name.empty() || dir_name == "." || dir_name == "/" || dir_name == "\\")
return base;
return base->GetDirectoryRelative(dir_name);
const auto res = base->GetDirectoryRelative(dir_name);
if (res == nullptr)
return base->CreateDirectoryRelative(dir_name);
return res;
}
VfsDirectoryServiceWrapper::VfsDirectoryServiceWrapper(FileSys::VirtualDir backing_)

View File

@@ -162,7 +162,7 @@ public:
RegisterHandlers(functions);
notification_event = Kernel::WritableEvent::CreateEventPair(
system.Kernel(), Kernel::ResetType::Manual, "INotificationService:NotifyEvent");
system.Kernel(), "INotificationService:NotifyEvent");
}
private:

View File

@@ -174,7 +174,7 @@ void Controller_NPad::OnInit() {
auto& kernel = system.Kernel();
for (std::size_t i = 0; i < styleset_changed_events.size(); i++) {
styleset_changed_events[i] = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, fmt::format("npad:NpadStyleSetChanged_{}", i));
kernel, fmt::format("npad:NpadStyleSetChanged_{}", i));
}
if (!IsControllerActivated()) {
@@ -583,36 +583,6 @@ bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) {
return true;
}
bool Controller_NPad::IsControllerSupported(NPadControllerType controller) {
if (controller == NPadControllerType::Handheld) {
// Handheld is not even a supported type, lets stop here
if (std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(),
NPAD_HANDHELD) == supported_npad_id_types.end()) {
return false;
}
// Handheld should not be supported in docked mode
if (Settings::values.use_docked_mode) {
return false;
}
}
switch (controller) {
case NPadControllerType::ProController:
return style.pro_controller;
case NPadControllerType::Handheld:
return style.handheld;
case NPadControllerType::JoyDual:
return style.joycon_dual;
case NPadControllerType::JoyLeft:
return style.joycon_left;
case NPadControllerType::JoyRight:
return style.joycon_right;
case NPadControllerType::Pokeball:
return style.pokeball;
default:
return false;
}
}
Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) {
if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) {
// These are controllers without led patterns
@@ -659,25 +629,24 @@ void Controller_NPad::ClearAllConnectedControllers() {
}
void Controller_NPad::DisconnectAllConnectedControllers() {
std::for_each(connected_controllers.begin(), connected_controllers.end(),
[](ControllerHolder& controller) { controller.is_connected = false; });
for (ControllerHolder& controller : connected_controllers) {
controller.is_connected = false;
}
}
void Controller_NPad::ConnectAllDisconnectedControllers() {
std::for_each(connected_controllers.begin(), connected_controllers.end(),
[](ControllerHolder& controller) {
if (controller.type != NPadControllerType::None && !controller.is_connected) {
controller.is_connected = false;
}
});
for (ControllerHolder& controller : connected_controllers) {
if (controller.type != NPadControllerType::None && !controller.is_connected) {
controller.is_connected = true;
}
}
}
void Controller_NPad::ClearAllControllers() {
std::for_each(connected_controllers.begin(), connected_controllers.end(),
[](ControllerHolder& controller) {
controller.type = NPadControllerType::None;
controller.is_connected = false;
});
for (ControllerHolder& controller : connected_controllers) {
controller.type = NPadControllerType::None;
controller.is_connected = false;
}
}
u32 Controller_NPad::GetAndResetPressState() {
@@ -685,10 +654,10 @@ u32 Controller_NPad::GetAndResetPressState() {
}
bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const {
const bool support_handheld =
std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), NPAD_HANDHELD) !=
supported_npad_id_types.end();
if (controller == NPadControllerType::Handheld) {
const bool support_handheld =
std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(),
NPAD_HANDHELD) != supported_npad_id_types.end();
// Handheld is not even a supported type, lets stop here
if (!support_handheld) {
return false;
@@ -700,6 +669,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
return true;
}
if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(),
[](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) {
switch (controller) {
@@ -717,6 +687,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
return false;
}
}
return false;
}
@@ -795,6 +766,7 @@ Controller_NPad::NPadControllerType Controller_NPad::DecideBestController(
priority_list.push_back(NPadControllerType::JoyLeft);
priority_list.push_back(NPadControllerType::JoyRight);
priority_list.push_back(NPadControllerType::JoyDual);
break;
}
const auto iter = std::find_if(priority_list.begin(), priority_list.end(),

View File

@@ -301,6 +301,11 @@ private:
bool is_connected;
};
void InitNewlyAddedControler(std::size_t controller_idx);
bool IsControllerSupported(NPadControllerType controller) const;
NPadControllerType DecideBestController(NPadControllerType priority) const;
void RequestPadStateUpdate(u32 npad_id);
u32 press_state{};
NPadType style{};
@@ -321,12 +326,7 @@ private:
std::array<ControllerHolder, 10> connected_controllers{};
bool can_controllers_vibrate{true};
void InitNewlyAddedControler(std::size_t controller_idx);
bool IsControllerSupported(NPadControllerType controller) const;
NPadControllerType DecideBestController(NPadControllerType priority) const;
void RequestPadStateUpdate(u32 npad_id);
std::array<ControllerPad, 10> npad_pad_states{};
bool IsControllerSupported(NPadControllerType controller);
bool is_in_lr_assignment_mode{false};
Core::System& system;
};

View File

@@ -203,13 +203,13 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
{120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
{121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
{122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
{123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
{123, &Hid::SetNpadJoyAssignmentModeSingle, "SetNpadJoyAssignmentModeSingle"},
{124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
{125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
{126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"},
{127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"},
{128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"},
{129, nullptr, "GetNpadHandheldActivationMode"},
{129, &Hid::GetNpadHandheldActivationMode, "GetNpadHandheldActivationMode"},
{130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"},
{131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
{132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
@@ -557,10 +557,126 @@ void Hid::SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx
LOG_WARNING(Service_HID, "(STUBBED) called, npad_id={}, applet_resource_user_id={}", npad_id,
applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Single);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx) {
// TODO: Check the differences between this and SetNpadJoyAssignmentModeSingleByDefault
IPC::RequestParser rp{ctx};
const auto npad_id{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
const auto npad_joy_device_type{rp.Pop<u64>()};
LOG_WARNING(Service_HID,
"(STUBBED) called, npad_id={}, applet_resource_user_id={}, npad_joy_device_type={}",
npad_id, applet_resource_user_id, npad_joy_device_type);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Single);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto npad_id{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, npad_id={}, applet_resource_user_id={}", npad_id,
applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Dual);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto unknown_1{rp.Pop<u32>()};
const auto unknown_2{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_WARNING(Service_HID,
"(STUBBED) called, unknown_1={}, unknown_2={}, applet_resource_user_id={}",
unknown_1, unknown_2, applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.StartLRAssignmentMode();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.StopLRAssignmentMode();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
const auto mode{rp.Pop<u64>()};
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, mode={}",
applet_resource_user_id, mode);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::GetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto npad_1{rp.Pop<u32>()};
const auto npad_2{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
applet_resource_user_id, npad_1, npad_2);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
IPC::ResponseBuilder rb{ctx, 2};
if (controller.SwapNpadAssignment(npad_1, npad_2)) {
rb.Push(RESULT_SUCCESS);
} else {
LOG_ERROR(Service_HID, "Npads are not connected!");
rb.Push(ERR_NPAD_NOT_CONNECTED);
}
}
void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
@@ -635,47 +751,6 @@ void Hid::GetActualVibrationValue(Kernel::HLERequestContext& ctx) {
applet_resource->GetController<Controller_NPad>(HidController::NPad).GetLastVibration());
}
void Hid::SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto npad_id{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, npad_id={}, applet_resource_user_id={}", npad_id,
applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.SetNpadMode(npad_id, Controller_NPad::NPadAssignments::Dual);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto unknown_1{rp.Pop<u32>()};
const auto unknown_2{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_WARNING(Service_HID,
"(STUBBED) called, unknown_1={}, unknown_2={}, applet_resource_user_id={}",
unknown_1, unknown_2, applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
const auto mode{rp.Pop<u64>()};
LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, mode={}",
applet_resource_user_id, mode);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_HID, "called");
@@ -769,49 +844,6 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.StartLRAssignmentMode();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
controller.StopLRAssignmentMode();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto npad_1{rp.Pop<u32>()};
const auto npad_2{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
applet_resource_user_id, npad_1, npad_2);
auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
IPC::ResponseBuilder rb{ctx, 2};
if (controller.SwapNpadAssignment(npad_1, npad_2)) {
rb.Push(RESULT_SUCCESS);
} else {
LOG_ERROR(Service_HID, "Npads are not connected!");
rb.Push(ERR_NPAD_NOT_CONNECTED);
}
}
class HidDbg final : public ServiceFramework<HidDbg> {
public:
explicit HidDbg() : ServiceFramework{"hid:dbg"} {

View File

@@ -106,14 +106,19 @@ private:
void SetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
void GetNpadJoyHoldType(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeSingleByDefault(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeSingle(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx);
void MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx);
void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
void GetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
void SwapNpadAssignment(Kernel::HLERequestContext& ctx);
void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
void SendVibrationValue(Kernel::HLERequestContext& ctx);
void SendVibrationValues(Kernel::HLERequestContext& ctx);
void GetActualVibrationValue(Kernel::HLERequestContext& ctx);
void SetNpadJoyAssignmentModeDual(Kernel::HLERequestContext& ctx);
void MergeSingleJoyAsDualJoy(Kernel::HLERequestContext& ctx);
void SetNpadHandheldActivationMode(Kernel::HLERequestContext& ctx);
void GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx);
void CreateActiveVibrationDeviceList(Kernel::HLERequestContext& ctx);
void PermitVibration(Kernel::HLERequestContext& ctx);
@@ -123,9 +128,6 @@ private:
void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
void SwapNpadAssignment(Kernel::HLERequestContext& ctx);
std::shared_ptr<IAppletResource> applet_resource;
Core::System& system;

View File

@@ -10,6 +10,8 @@
#include "core/hle/service/lbl/lbl.h"
#include "core/hle/service/service.h"
#include "core/hle/service/sm/sm.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
namespace Service::LBL {
@@ -18,21 +20,21 @@ public:
explicit LBL() : ServiceFramework{"lbl"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "SaveCurrentSetting"},
{1, nullptr, "LoadCurrentSetting"},
{2, nullptr, "SetCurrentBrightnessSetting"},
{3, nullptr, "GetCurrentBrightnessSetting"},
{4, nullptr, "ApplyCurrentBrightnessSettingToBacklight"},
{5, nullptr, "GetBrightnessSettingAppliedToBacklight"},
{6, nullptr, "SwitchBacklightOn"},
{7, nullptr, "SwitchBacklightOff"},
{8, nullptr, "GetBacklightSwitchStatus"},
{9, nullptr, "EnableDimming"},
{10, nullptr, "DisableDimming"},
{11, nullptr, "IsDimmingEnabled"},
{12, nullptr, "EnableAutoBrightnessControl"},
{13, nullptr, "DisableAutoBrightnessControl"},
{14, nullptr, "IsAutoBrightnessControlEnabled"},
{0, &LBL::SaveCurrentSetting, "SaveCurrentSetting"},
{1, &LBL::LoadCurrentSetting, "LoadCurrentSetting"},
{2, &LBL::SetCurrentBrightnessSetting, "SetCurrentBrightnessSetting"},
{3, &LBL::GetCurrentBrightnessSetting, "GetCurrentBrightnessSetting"},
{4, &LBL::ApplyCurrentBrightnessSettingToBacklight, "ApplyCurrentBrightnessSettingToBacklight"},
{5, &LBL::GetBrightnessSettingAppliedToBacklight, "GetBrightnessSettingAppliedToBacklight"},
{6, &LBL::SwitchBacklightOn, "SwitchBacklightOn"},
{7, &LBL::SwitchBacklightOff, "SwitchBacklightOff"},
{8, &LBL::GetBacklightSwitchStatus, "GetBacklightSwitchStatus"},
{9, &LBL::EnableDimming, "EnableDimming"},
{10, &LBL::DisableDimming, "DisableDimming"},
{11, &LBL::IsDimmingEnabled, "IsDimmingEnabled"},
{12, &LBL::EnableAutoBrightnessControl, "EnableAutoBrightnessControl"},
{13, &LBL::DisableAutoBrightnessControl, "DisableAutoBrightnessControl"},
{14, &LBL::IsAutoBrightnessControlEnabled, "IsAutoBrightnessControlEnabled"},
{15, nullptr, "SetAmbientLightSensorValue"},
{16, nullptr, "GetAmbientLightSensorValue"},
{17, nullptr, "SetBrightnessReflectionDelayLevel"},
@@ -42,8 +44,8 @@ public:
{21, nullptr, "SetCurrentAmbientLightSensorMapping"},
{22, nullptr, "GetCurrentAmbientLightSensorMapping"},
{23, nullptr, "IsAmbientLightSensorAvailable"},
{24, nullptr, "SetCurrentBrightnessSettingForVrMode"},
{25, nullptr, "GetCurrentBrightnessSettingForVrMode"},
{24, &LBL::SetCurrentBrightnessSettingForVrMode, "SetCurrentBrightnessSettingForVrMode"},
{25, &LBL::GetCurrentBrightnessSettingForVrMode, "GetCurrentBrightnessSettingForVrMode"},
{26, &LBL::EnableVrMode, "EnableVrMode"},
{27, &LBL::DisableVrMode, "DisableVrMode"},
{28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"},
@@ -53,13 +55,209 @@ public:
RegisterHandlers(functions);
}
void LoadFromSettings() {
current_brightness = Settings::values.backlight_brightness;
current_vr_mode_brightness = Settings::values.backlight_brightness;
if (auto_brightness_enabled) {
return;
}
if (vr_mode_enabled) {
Renderer().SetCurrentBrightness(current_vr_mode_brightness);
} else {
Renderer().SetCurrentBrightness(current_brightness);
}
}
private:
f32 GetAutoBrightnessValue() const {
return 0.5f;
}
VideoCore::RendererBase& Renderer() {
return Core::System::GetInstance().Renderer();
}
void SaveCurrentSetting(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
Settings::values.backlight_brightness = current_brightness;
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void LoadCurrentSetting(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
LoadFromSettings();
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void SetCurrentBrightnessSetting(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto value = rp.PopRaw<f32>();
LOG_DEBUG(Service_LBL, "called, value={:.3f}", value);
current_brightness = std::clamp(value, 0.0f, 1.0f);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void GetCurrentBrightnessSetting(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(current_brightness);
}
void ApplyCurrentBrightnessSettingToBacklight(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
if (!auto_brightness_enabled) {
Renderer().SetCurrentBrightness(vr_mode_enabled ? current_vr_mode_brightness
: current_brightness);
}
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void GetBrightnessSettingAppliedToBacklight(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(Renderer().GetCurrentResultantBrightness());
}
void SwitchBacklightOn(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto fade_time = rp.PopRaw<u64>();
LOG_DEBUG(Service_LBL, "called, fade_time={:016X}", fade_time);
Renderer().SetBacklightStatus(true, fade_time);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void SwitchBacklightOff(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto fade_time = rp.PopRaw<u64>();
LOG_DEBUG(Service_LBL, "called, fade_time={:016X}", fade_time);
Renderer().SetBacklightStatus(false, fade_time);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void GetBacklightSwitchStatus(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(Renderer().GetBacklightStatus());
}
void EnableDimming(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
dimming_enabled = true;
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void DisableDimming(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "callled");
dimming_enabled = false;
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void IsDimmingEnabled(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(dimming_enabled);
}
void EnableAutoBrightnessControl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
auto_brightness_enabled = true;
Renderer().SetCurrentBrightness(GetAutoBrightnessValue());
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void DisableAutoBrightnessControl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
auto_brightness_enabled = false;
Renderer().SetCurrentBrightness(current_brightness);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void IsAutoBrightnessControlEnabled(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(auto_brightness_enabled);
}
void SetCurrentBrightnessSettingForVrMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto value = rp.PopRaw<f32>();
LOG_DEBUG(Service_LBL, "called, value={:.3f}", value);
current_vr_mode_brightness = std::clamp(value, 0.0f, 1.0f);
if (vr_mode_enabled && !auto_brightness_enabled) {
Renderer().SetCurrentBrightness(value);
}
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void GetCurrentBrightnessSettingForVrMode(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(current_vr_mode_brightness);
}
void EnableVrMode(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_LBL, "called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
if (!vr_mode_enabled && !auto_brightness_enabled &&
current_brightness != current_vr_mode_brightness) {
Renderer().SetCurrentBrightness(current_vr_mode_brightness);
}
vr_mode_enabled = true;
}
@@ -69,6 +267,11 @@ private:
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
if (vr_mode_enabled && !auto_brightness_enabled &&
current_brightness != current_vr_mode_brightness) {
Renderer().SetCurrentBrightness(current_brightness);
}
vr_mode_enabled = false;
}
@@ -80,9 +283,27 @@ private:
rb.Push(vr_mode_enabled);
}
bool auto_brightness_enabled = false;
bool dimming_enabled = true;
f32 current_brightness = GetAutoBrightnessValue();
f32 current_vr_mode_brightness = GetAutoBrightnessValue();
bool vr_mode_enabled = false;
};
void RequestLoadCurrentSetting(SM::ServiceManager& sm) {
if (&sm == nullptr) {
return;
}
const auto lbl = sm.GetService<LBL>("lbl");
if (lbl) {
lbl->LoadFromSettings();
}
}
void InstallInterfaces(SM::ServiceManager& sm) {
std::make_shared<LBL>()->InstallAsService(sm);
}

View File

@@ -10,6 +10,9 @@ class ServiceManager;
namespace Service::LBL {
// Requests the LBL service passed to load brightness values from Settings
void RequestLoadCurrentSetting(SM::ServiceManager& sm);
void InstallInterfaces(SM::ServiceManager& sm);
} // namespace Service::LBL

View File

@@ -26,8 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
Module::Interface::Interface(std::shared_ptr<Module> module, Core::System& system, const char* name)
: ServiceFramework(name), module(std::move(module)), system(system) {
auto& kernel = system.Kernel();
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IUser:NFCTagDetected");
nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, "IUser:NFCTagDetected");
}
Module::Interface::~Interface() = default;
@@ -66,10 +65,9 @@ public:
RegisterHandlers(functions);
auto& kernel = system.Kernel();
deactivate_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
availability_change_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
deactivate_event = Kernel::WritableEvent::CreateEventPair(kernel, "IUser:DeactivateEvent");
availability_change_event =
Kernel::WritableEvent::CreateEventPair(kernel, "IUser:AvailabilityChangeEvent");
}
private:

View File

@@ -9,6 +9,7 @@
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/service.h"
#include "core/settings.h"
namespace Service::NIFM {
@@ -69,10 +70,8 @@ public:
RegisterHandlers(functions);
auto& kernel = system.Kernel();
event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IRequest:Event1");
event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
"IRequest:Event2");
event1 = Kernel::WritableEvent::CreateEventPair(kernel, "IRequest:Event1");
event2 = Kernel::WritableEvent::CreateEventPair(kernel, "IRequest:Event2");
}
private:
@@ -88,7 +87,12 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.PushEnum(RequestState::Connected);
if (Settings::values.bcat_backend == "none") {
rb.PushEnum(RequestState::NotSubmitted);
} else {
rb.PushEnum(RequestState::Connected);
}
}
void GetResult(Kernel::HLERequestContext& ctx) {
@@ -196,14 +200,22 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(1);
if (Settings::values.bcat_backend == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
}
}
void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_NIFM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u8>(1);
if (Settings::values.bcat_backend == "none") {
rb.Push<u8>(0);
} else {
rb.Push<u8>(1);
}
}
Core::System& system;
};

View File

@@ -141,8 +141,7 @@ public:
auto& kernel = system.Kernel();
finished_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Automatic,
"IEnsureNetworkClockAvailabilityService:FinishEvent");
kernel, "IEnsureNetworkClockAvailabilityService:FinishEvent");
}
private:

View File

@@ -22,6 +22,18 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
switch (static_cast<IoctlCommand>(command.raw)) {
case IoctlCommand::IocSetNVMAPfdCommand:
return SetNVMAPfd(input, output);
case IoctlCommand::IocSubmit:
return Submit(input, output);
case IoctlCommand::IocGetSyncpoint:
return GetSyncpoint(input, output);
case IoctlCommand::IocGetWaitbase:
return GetWaitbase(input, output);
case IoctlCommand::IocMapBuffer:
return MapBuffer(input, output);
case IoctlCommand::IocMapBufferEx:
return MapBufferEx(input, output);
case IoctlCommand::IocUnmapBufferEx:
return UnmapBufferEx(input, output);
}
UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -30,11 +42,67 @@ u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, const std::
u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size());
std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
nvmap_fd = params.nvmap_fd;
return 0;
}
u32 nvhost_nvdec::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlSubmit params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
return 0;
}
u32 nvhost_nvdec::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetSyncpoint params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
params.value = 0; // Seems to be hard coded at 0
std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
return 0;
}
u32 nvhost_nvdec::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetWaitbase params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
params.value = 0; // Seems to be hard coded at 0
std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
return 0;
}
u32 nvhost_nvdec::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
params.address_1);
params.address_1 = 0;
params.address_2 = 0;
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
return 0;
}
u32 nvhost_nvdec::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBufferEx params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
params.address_1);
params.address_1 = 0;
params.address_2 = 0;
std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
return 0;
}
u32 nvhost_nvdec::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlUnmapBufferEx params{};
std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
return 0;
}
} // namespace Service::Nvidia::Devices

View File

@@ -23,16 +23,66 @@ public:
private:
enum class IoctlCommand : u32_le {
IocSetNVMAPfdCommand = 0x40044801,
IocSubmit = 0xC0400001,
IocGetSyncpoint = 0xC0080002,
IocGetWaitbase = 0xC0080003,
IocMapBuffer = 0xC01C0009,
IocMapBufferEx = 0xC0A40009,
IocUnmapBufferEx = 0xC0A4000A,
};
struct IoctlSetNvmapFD {
u32_le nvmap_fd;
};
static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
static_assert(sizeof(IoctlSetNvmapFD) == 0x4, "IoctlSetNvmapFD is incorrect size");
struct IoctlSubmit {
INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit has incorrect size");
struct IoctlGetSyncpoint {
u32 unknown; // seems to be ignored? Nintendo added this
u32 value;
};
static_assert(sizeof(IoctlGetSyncpoint) == 0x08, "IoctlGetSyncpoint has incorrect size");
struct IoctlGetWaitbase {
u32 unknown; // seems to be ignored? Nintendo added this
u32 value;
};
static_assert(sizeof(IoctlGetWaitbase) == 0x08, "IoctlGetWaitbase has incorrect size");
struct IoctlMapBuffer {
u32 unknown;
u32 address_1;
u32 address_2;
INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
struct IoctlMapBufferEx {
u32 unknown;
u32 address_1;
u32 address_2;
INSERT_PADDING_BYTES(0x98); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlMapBufferEx) == 0xA4, "IoctlMapBufferEx has incorrect size");
struct IoctlUnmapBufferEx {
INSERT_PADDING_BYTES(0xA4); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlUnmapBufferEx) == 0xA4, "IoctlUnmapBufferEx has incorrect size");
u32_le nvmap_fd{};
u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
};
} // namespace Service::Nvidia::Devices

View File

@@ -22,6 +22,18 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
switch (static_cast<IoctlCommand>(command.raw)) {
case IoctlCommand::IocSetNVMAPfdCommand:
return SetNVMAPfd(input, output);
case IoctlCommand::IocSubmit:
return Submit(input, output);
case IoctlCommand::IocGetSyncpoint:
return GetSyncpoint(input, output);
case IoctlCommand::IocGetWaitbase:
return GetWaitbase(input, output);
case IoctlCommand::IocMapBuffer:
return MapBuffer(input, output);
case IoctlCommand::IocMapBufferEx:
return MapBuffer(input, output);
case IoctlCommand::IocUnmapBufferEx:
return UnmapBufferEx(input, output);
}
UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -30,11 +42,67 @@ u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, const std::ve
u32 nvhost_vic::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlSetNvmapFD params{};
std::memcpy(&params, input.data(), input.size());
std::memcpy(&params, input.data(), sizeof(IoctlSetNvmapFD));
LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
nvmap_fd = params.nvmap_fd;
return 0;
}
u32 nvhost_vic::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlSubmit params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
return 0;
}
u32 nvhost_vic::GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetSyncpoint params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
params.value = 0; // Seems to be hard coded at 0
std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
return 0;
}
u32 nvhost_vic::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlGetWaitbase params{};
std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
params.value = 0; // Seems to be hard coded at 0
std::memcpy(output.data(), &params, sizeof(IoctlGetWaitbase));
return 0;
}
u32 nvhost_vic::MapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBuffer params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
params.address_1);
params.address_1 = 0;
params.address_2 = 0;
std::memcpy(output.data(), &params, sizeof(IoctlMapBuffer));
return 0;
}
u32 nvhost_vic::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlMapBufferEx params{};
std::memcpy(&params, input.data(), sizeof(IoctlMapBufferEx));
LOG_WARNING(Service_NVDRV, "(STUBBED) called with address={:08X}{:08X}", params.address_2,
params.address_1);
params.address_1 = 0;
params.address_2 = 0;
std::memcpy(output.data(), &params, sizeof(IoctlMapBufferEx));
return 0;
}
u32 nvhost_vic::UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlUnmapBufferEx params{};
std::memcpy(&params, input.data(), sizeof(IoctlUnmapBufferEx));
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
std::memcpy(output.data(), &params, sizeof(IoctlUnmapBufferEx));
return 0;
}
} // namespace Service::Nvidia::Devices

View File

@@ -23,6 +23,12 @@ public:
private:
enum class IoctlCommand : u32_le {
IocSetNVMAPfdCommand = 0x40044801,
IocSubmit = 0xC0400001,
IocGetSyncpoint = 0xC0080002,
IocGetWaitbase = 0xC0080003,
IocMapBuffer = 0xC01C0009,
IocMapBufferEx = 0xC03C0009,
IocUnmapBufferEx = 0xC03C000A,
};
struct IoctlSetNvmapFD {
@@ -30,9 +36,53 @@ private:
};
static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
struct IoctlSubmit {
INSERT_PADDING_BYTES(0x40); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlSubmit) == 0x40, "IoctlSubmit is incorrect size");
struct IoctlGetSyncpoint {
u32 unknown; // seems to be ignored? Nintendo added this
u32 value;
};
static_assert(sizeof(IoctlGetSyncpoint) == 0x8, "IoctlGetSyncpoint is incorrect size");
struct IoctlGetWaitbase {
u32 unknown; // seems to be ignored? Nintendo added this
u32 value;
};
static_assert(sizeof(IoctlGetWaitbase) == 0x8, "IoctlGetWaitbase is incorrect size");
struct IoctlMapBuffer {
u32 unknown;
u32 address_1;
u32 address_2;
INSERT_PADDING_BYTES(0x10); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlMapBuffer) == 0x1C, "IoctlMapBuffer is incorrect size");
struct IoctlMapBufferEx {
u32 unknown;
u32 address_1;
u32 address_2;
INSERT_PADDING_BYTES(0x30); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlMapBufferEx) == 0x3C, "IoctlMapBufferEx is incorrect size");
struct IoctlUnmapBufferEx {
INSERT_PADDING_BYTES(0x3C); // TODO(DarkLordZach): RE this structure
};
static_assert(sizeof(IoctlUnmapBufferEx) == 0x3C, "IoctlUnmapBufferEx is incorrect size");
u32_le nvmap_fd{};
u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
u32 Submit(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
u32 MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
u32 MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
u32 UnmapBufferEx(const std::vector<u8>& input, std::vector<u8>& output);
};
} // namespace Service::Nvidia::Devices

View File

@@ -40,8 +40,7 @@ Module::Module(Core::System& system) {
auto& kernel = system.Kernel();
for (u32 i = 0; i < MaxNvEvents; i++) {
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
events_interface.events[i] =
Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label);
events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label);
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}

View File

@@ -16,8 +16,7 @@ namespace Service::NVFlinger {
BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id)
: id(id), layer_id(layer_id) {
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"BufferQueue NativeHandle");
buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, "BufferQueue NativeHandle");
}
BufferQueue::~BufferQueue() = default;

View File

@@ -17,8 +17,8 @@ namespace Service::VI {
Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{std::move(name)} {
auto& kernel = system.Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
fmt::format("Display VSync Event {}", id));
vsync_event =
Kernel::WritableEvent::CreateEventPair(kernel, fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;

View File

@@ -6,6 +6,8 @@
#include "core/core.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/lbl/lbl.h"
#include "core/hle/service/sm/sm.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
@@ -70,6 +72,7 @@ void Apply() {
auto& system_instance = Core::System::GetInstance();
if (system_instance.IsPoweredOn()) {
system_instance.Renderer().RefreshBaseSettings();
Service::LBL::RequestLoadCurrentSetting(system_instance.ServiceManager());
}
Service::HID::ReloadInputDevices();

View File

@@ -428,6 +428,8 @@ struct Values {
float bg_green;
float bg_blue;
float backlight_brightness = 0.5f;
std::string log_filter;
bool use_dev_keys;

View File

@@ -6,6 +6,7 @@ add_library(video_core STATIC
dma_pusher.h
debug_utils/debug_utils.cpp
debug_utils/debug_utils.h
engines/const_buffer_engine_interface.h
engines/const_buffer_info.h
engines/engine_upload.cpp
engines/engine_upload.h
@@ -35,6 +36,8 @@ add_library(video_core STATIC
memory_manager.h
morton.cpp
morton.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
rasterizer_cache.cpp
rasterizer_cache.h
rasterizer_interface.h
@@ -107,10 +110,12 @@ add_library(video_core STATIC
shader/decode/other.cpp
shader/ast.cpp
shader/ast.h
shader/control_flow.cpp
shader/control_flow.h
shader/compiler_settings.cpp
shader/compiler_settings.h
shader/const_buffer_locker.cpp
shader/const_buffer_locker.h
shader/control_flow.cpp
shader/control_flow.h
shader/decode.cpp
shader/expr.cpp
shader/expr.h

View File

@@ -30,7 +30,7 @@ public:
using BufferInfo = std::pair<const TBufferType*, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false) {
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +43,13 @@ public:
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (size < max_stream_size) {
if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
return StreamBufferUpload(host_ptr, size, alignment);
if (use_fast_cbuf) {
return ConstBufferUpload(host_ptr, size);
} else {
return StreamBufferUpload(host_ptr, size, alignment);
}
}
}
@@ -152,6 +156,10 @@ protected:
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
return {};
}
/// Register an object into the cache
void Register(const MapInterval& new_map, bool inherit_written = false) {
const CacheAddr cache_ptr = new_map->GetStart();

View File

@@ -0,0 +1,119 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
enum class ShaderType : u32 {
Vertex = 0,
TesselationControl = 1,
TesselationEval = 2,
Geometry = 3,
Fragment = 4,
Compute = 5,
};
struct SamplerDescriptor {
union {
BitField<0, 20, Tegra::Shader::TextureType> texture_type;
BitField<20, 1, u32> is_array;
BitField<21, 1, u32> is_buffer;
BitField<22, 1, u32> is_shadow;
u32 raw{};
};
bool operator==(const SamplerDescriptor& rhs) const noexcept {
return raw == rhs.raw;
}
bool operator!=(const SamplerDescriptor& rhs) const noexcept {
return !operator==(rhs);
}
static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
SamplerDescriptor result;
switch (tic_texture_type) {
case Tegra::Texture::TextureType::Texture1D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture3D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::TextureCubemap:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture1DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture1DBuffer:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.is_buffer.Assign(1);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DNoMipmap:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::TextureCubeArray:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
default:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
}
}
};
static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
class ConstBufferEngineInterface {
public:
virtual ~ConstBufferEngineInterface() = default;
virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
virtual u32 GetBoundBuffer() const = 0;
};
} // namespace Tegra::Engines

View File

@@ -28,6 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
}
}
std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
const u32 line_a = src_2 - src_1;
const u32 line_b = dst_2 - dst_1;
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
return {line_b - (excess * line_b) / line_a, excess};
}
void Fermi2D::HandleSurfaceCopy() {
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
static_cast<u32>(regs.operation));
@@ -47,10 +54,27 @@ void Fermi2D::HandleSurfaceCopy() {
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
}
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
const auto [new_dst_w, src_excess_x] =
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
const auto [new_dst_h, src_excess_y] =
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
src_blit_x2 = src_blit_x2 - src_excess_x;
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
src_blit_y2 = src_blit_y2 - src_excess_y;
const auto [new_src_w, dst_excess_x] =
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
const auto [new_src_h, dst_excess_y] =
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
src_blit_x2 = new_src_w + src_blit_x1;
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
src_blit_y2 = new_src_h + src_blit_y1;
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
dst_blit_y2};
Config copy_config;
copy_config.operation = regs.operation;
copy_config.filter = regs.blit_control.filter;

View File

@@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
}
}
Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
ASSERT(cbuf_mask[regs.tex_cb_index]);
@@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co
ASSERT(address < texinfo.Address() + texinfo.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
return GetTextureInfo(tex_handle, offset);
return GetTextureInfo(tex_handle);
}
Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const {
return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
GetTSCEntry(tex_handle.tsc_id)};
Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
return result;
}
SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
}
SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,

View File

@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
#include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
class KeplerCompute final {
class KeplerCompute final : public ConstBufferEngineInterface {
public:
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -195,13 +196,21 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
Texture::FullTextureInfo GetTexture(std::size_t offset) const;
/// Given a Texture Handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const;
/// Given a texture handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
private:
Core::System& system;

View File

@@ -98,11 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
void Maxwell3D::InitDirtySettings() {
const auto set_block = [this](const std::size_t start, const std::size_t range,
const u8 position) {
const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
const auto start_itr = dirty_pointers.begin() + start;
const auto end_itr = start_itr + range;
std::fill(start_itr, end_itr, position);
@@ -113,10 +112,10 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
set_block(rt_reg, registers_per_rt, rt_dirty_reg);
rt_dirty_reg++;
++rt_dirty_reg;
}
constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
@@ -130,35 +129,35 @@ void Maxwell3D::InitDirtySettings() {
constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
u32 va_reg = DIRTY_REGS_POS(vertex_array);
u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
vertex_reg += vertex_array_size) {
set_block(vertex_reg, 3, va_reg);
set_block(vertex_reg, 3, va_dirty_reg);
// The divisor concerns vertex array instances
dirty_pointers[vertex_reg + 3] = vi_reg;
va_reg++;
vi_reg++;
dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
++va_dirty_reg;
++vi_dirty_reg;
}
constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
va_reg = DIRTY_REGS_POS(vertex_array);
va_dirty_reg = DIRTY_REGS_POS(vertex_array);
for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
vertex_reg += vertex_limit_size) {
set_block(vertex_reg, vertex_limit_size, va_reg);
va_reg++;
set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
va_dirty_reg++;
}
constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
constexpr u32 vertex_instance_size =
sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
constexpr u32 vertex_instance_end =
vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
vi_reg = DIRTY_REGS_POS(vertex_instance);
vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
vertex_reg += vertex_instance_size) {
set_block(vertex_reg, vertex_instance_size, vi_reg);
vi_reg++;
set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
vi_dirty_reg++;
}
set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
DIRTY_REGS_POS(vertex_attrib_format));
@@ -172,7 +171,7 @@ void Maxwell3D::InitDirtySettings() {
// State
// Viewport
constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
set_block(viewport_start, viewport_size, viewport_dirty_reg);
@@ -199,7 +198,7 @@ void Maxwell3D::InitDirtySettings() {
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
// Depth Test
constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
@@ -224,12 +223,12 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
// Color Mask
constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
color_mask_dirty_reg);
// Blend State
constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
blend_state_dirty_reg);
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
@@ -238,12 +237,12 @@ void Maxwell3D::InitDirtySettings() {
blend_state_dirty_reg);
// Scissor State
constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
scissor_test_dirty_reg);
// Polygon Offset
constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
@@ -252,7 +251,7 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
// Depth bounds
constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
}
@@ -761,61 +760,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
return tsc_entry;
}
std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
std::vector<Texture::FullTextureInfo> textures;
auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
// Offset into the texture constbuffer where the texture info begins.
static constexpr std::size_t TextureInfoOffset = 0x20;
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
tex_info.index =
static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
sizeof(Texture::TextureHandle);
// Load the TIC data.
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
// Load the TSC data
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
textures.push_back(tex_info);
}
return textures;
}
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const {
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
// Load the TIC data.
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
// Load the TSC data
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
return tex_info;
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
@@ -831,7 +777,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
return GetTextureInfo(tex_handle, offset);
return GetTextureInfo(tex_handle);
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
@@ -847,7 +793,8 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer.Clear();
}
u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
u32 result;
@@ -855,4 +802,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
return result;
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
}
SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const {
ASSERT(stage != ShaderType::Compute);
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}
} // namespace Tegra::Engines

View File

@@ -15,6 +15,7 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
class Maxwell3D final {
class Maxwell3D final : public ConstBufferEngineInterface {
public:
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
@@ -1165,6 +1166,8 @@ public:
struct DirtyRegs {
static constexpr std::size_t NUM_REGS = 256;
static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
union {
struct {
bool null_dirty;
@@ -1247,17 +1250,22 @@ public:
void FlushMMEInlineDraw();
/// Given a Texture Handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const;
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
/// Given a texture handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.

View File

@@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 {
};
union Instruction {
Instruction& operator=(const Instruction& instr) {
constexpr Instruction& operator=(const Instruction& instr) {
value = instr.value;
return *this;
}
@@ -1237,6 +1237,32 @@ union Instruction {
}
} tld4;
union {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<33, 2, u64> info;
BitField<37, 2, u64> component;
bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
case TextureMiscMode::NODEP:
return nodep_flag != 0;
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::AOFFI:
return info == 1;
case TextureMiscMode::PTP:
return info == 2;
default:
break;
}
return false;
}
} tld4_b;
union {
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
@@ -1590,7 +1616,8 @@ public:
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
TLD4, // Texture Gather 4
TLD4_B, // Texture Gather 4 Bindless
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
@@ -1760,22 +1787,22 @@ public:
class Matcher {
public:
Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
: name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
const char* GetName() const {
constexpr const char* GetName() const {
return name;
}
u16 GetMask() const {
constexpr u16 GetMask() const {
return mask;
}
Id GetId() const {
constexpr Id GetId() const {
return id;
}
Type GetType() const {
constexpr Type GetType() const {
return type;
}
@@ -1784,7 +1811,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
bool Matches(u16 instruction) const {
constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -1818,7 +1845,7 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
static auto GetMaskAndExpect(const char* const bitstring) {
static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -1835,15 +1862,15 @@ private:
break;
}
}
return std::make_tuple(mask, expect);
return std::make_pair(mask, expect);
}
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
const char* const name) {
const auto mask_expect = GetMaskAndExpect(bitstring);
return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
const char* const name) {
const auto [mask, expected] = GetMaskAndExpect(bitstring);
return Matcher(name, mask, expected, op, type);
}
};
@@ -1881,6 +1908,7 @@ private:
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),

View File

@@ -112,6 +112,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
MortonCopy<true, PixelFormat::E5B9G9R9F>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -192,6 +193,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::E5B9G9R9F>,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,

View File

@@ -0,0 +1,63 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <mutex>
#include <boost/icl/interval_map.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/rasterizer_accelerated.h"
namespace VideoCore {
namespace {
template <typename Map, typename Interval>
constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
} // Anonymous namespace
RasterizerAccelerated::RasterizerAccelerated() = default;
RasterizerAccelerated::~RasterizerAccelerated() = default;
void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
std::lock_guard lock{pages_mutex};
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
// subtract after iterating
const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
if (delta > 0) {
cached_pages.add({pages_interval, delta});
}
for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
const auto interval = pair.first & pages_interval;
const int count = pair.second;
const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
const u64 interval_size = interval_end_addr - interval_start_addr;
if (delta > 0 && count == delta) {
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
} else if (delta < 0 && count == -delta) {
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
} else {
ASSERT(count >= 0);
}
}
if (delta < 0) {
cached_pages.add({pages_interval, delta});
}
}
} // namespace VideoCore

View File

@@ -0,0 +1,31 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCore {
/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
class RasterizerAccelerated : public RasterizerInterface {
public:
explicit RasterizerAccelerated();
~RasterizerAccelerated() override;
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
private:
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
std::mutex pages_mutex;
};
} // namespace VideoCore

View File

@@ -40,4 +40,35 @@ void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
renderer_settings.screenshot_requested = true;
}
f32 RendererBase::GetCurrentResultantBrightness() const {
return renderer_settings.current_brightness / 2.0f;
}
void RendererBase::SetBacklightStatus(bool enabled, u64 fade_transition_time) {
if (fade_transition_time == 0) {
// Needed to ensure the renderer recognizes that a change must occur.
fade_transition_time = 1;
}
if (enabled && renderer_settings.current_brightness == 0) {
renderer_settings.current_brightness = current_brightness_backup;
renderer_settings.backlight_fade_time = fade_transition_time;
} else if (!enabled && renderer_settings.current_brightness != 0) {
current_brightness_backup = renderer_settings.current_brightness;
renderer_settings.current_brightness = 0;
renderer_settings.backlight_fade_time = fade_transition_time;
}
}
bool RendererBase::GetBacklightStatus() const {
return renderer_settings.current_brightness != 0;
}
void RendererBase::SetCurrentBrightness(f32 value) {
if (value != renderer_settings.current_brightness) {
renderer_settings.current_brightness = value * 2.0f;
renderer_settings.backlight_fade_time = 1;
}
}
} // namespace VideoCore

View File

@@ -28,6 +28,10 @@ struct RendererSettings {
void* screenshot_bits;
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
// Backlight & Brightness
std::atomic<f32> current_brightness{1.f};
std::atomic<u64> backlight_fade_time{0};
};
class RendererBase : NonCopyable {
@@ -86,6 +90,17 @@ public:
void RequestScreenshot(void* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout);
// Gets the current brightness, even if it has been changed from the set value. Most of the time
// for yuzu this will simply match what was returned, but implementations are free to change the
// value in settings.
f32 GetCurrentResultantBrightness() const;
void SetBacklightStatus(bool enabled, u64 fade_transition_time);
bool GetBacklightStatus() const;
void SetCurrentBrightness(f32 value);
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
@@ -97,6 +112,9 @@ protected:
private:
/// Updates the framebuffer layout of the contained render window handle.
void UpdateCurrentFramebufferLayout();
// Value of brightness before backlight switch used to preserve value.
f32 current_brightness_backup;
};
} // namespace VideoCore

View File

@@ -8,13 +8,17 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
CachedBufferBlock::~CachedBufferBlock() = default;
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
std::size_t stream_size)
: VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
const Device& device, std::size_t stream_size)
: GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
if (!device.HasFastBufferSubData()) {
return;
}
OGLBufferCache::~OGLBufferCache() = default;
static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
}
}
OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
}
Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
static_cast<GLsizeiptr>(size));
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint& cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {&cbuf, 0};
}
} // namespace OpenGL

View File

@@ -4,10 +4,12 @@
#pragma once
#include <array>
#include <memory>
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
namespace OpenGL {
class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
class CachedBufferBlock;
using Buffer = std::shared_ptr<CachedBufferBlock>;
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class CachedBufferBlock : public VideoCommon::BufferBlock {
public:
@@ -38,14 +42,18 @@ private:
OGLBuffer gl_buffer{};
};
class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
std::size_t stream_size);
const Device& device, std::size_t stream_size);
~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
}
protected:
Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
@@ -61,6 +69,14 @@ protected:
void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) override;
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
private:
std::size_t cbuf_cursor = 0;
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
cbufs;
};
} // namespace OpenGL

View File

@@ -51,8 +51,11 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
} // Anonymous namespace
Device::Device() {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -64,6 +67,7 @@ Device::Device() {
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
has_precise_bug = TestPreciseBug();
has_fast_buffer_sub_data = is_nvidia;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);

View File

@@ -54,6 +54,10 @@ public:
return has_precise_bug;
}
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
@@ -69,6 +73,7 @@ private:
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
bool has_fast_buffer_sub_data{};
};
} // namespace OpenGL

View File

@@ -67,9 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info)
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
OpenGLState::ApplyDefaultState();
system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -259,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu, stage);
ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
@@ -271,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
Shader shader{shader_cache.GetStageProgram(program)};
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupDrawConstBuffers(stage_enum, shader);
SetupDrawGlobalMemory(stage_enum, shader);
const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
// Stage indices are 0 - 5
const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1);
SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader);
const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -342,42 +339,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
}
template <typename Map, typename Interval>
static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
std::lock_guard lock{pages_mutex};
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
// subtract after iterating
const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
if (delta > 0)
cached_pages.add({pages_interval, delta});
for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
const auto interval = pair.first & pages_interval;
const int count = pair.second;
const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
const u64 interval_size = interval_end_addr - interval_start_addr;
if (delta > 0 && count == delta)
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
else if (delta < 0 && count == -delta)
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
else
ASSERT(count >= 0);
}
if (delta < 0)
cached_pages.add({pages_interval, delta});
}
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(stop_loading, callback);
@@ -596,6 +557,8 @@ void RasterizerOpenGL::DrawPrelude() {
SyncPolygonOffset();
SyncAlphaTest();
buffer_cache.Acquire();
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -917,7 +880,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(cbuf, offset, size);
}
@@ -969,14 +933,15 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto texture = [&]() {
const auto texture = [&] {
if (!entry.IsBindless()) {
return maxwell3d.GetStageTexture(stage, entry.GetOffset());
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second);
return maxwell3d.GetTextureInfo(tex_handle);
}();
if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
@@ -999,14 +964,15 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto texture = [&]() {
const auto texture = [&] {
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset());
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset());
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle);
}();
if (SetupTexture(bindpoint, texture, entry)) {
@@ -1044,14 +1010,15 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& entries = shader->GetShaderEntries().images;
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto tic = [&]() {
const auto tic = [&] {
if (!entry.IsBindless()) {
return compute.GetTexture(entry.GetOffset()).tic;
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle).tic;
}();
SetupImage(bindpoint, tic, entry);
}
@@ -1092,6 +1059,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
bool flip_y = false;
if (regs.viewport_transform[0].scale_y < 0.0) {
flip_y = !flip_y;
}
if (regs.screen_y_control.y_negate != 0) {
flip_y = !flip_y;
}
state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
}
void RasterizerOpenGL::SyncClipEnabled(
@@ -1114,26 +1090,24 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
auto& maxwell3d = system.GPU().Maxwell3D();
const auto& regs = maxwell3d.regs;
const auto& regs = system.GPU().Maxwell3D().regs;
state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
}
const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
regs.viewport_transform[0].scale_y < 0.0f};
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
// If the GPU is configured to flip the rasterized triangles, then we need to flip the
// notion of front and back. Note: We flip the triangles when the value of the register is 0
// because OpenGL already does it for us.
if (flip_triangles) {
if (state.cull.front_face == GL_CCW)
state.cull.front_face = GL_CW;
else if (state.cull.front_face == GL_CW)
state.cull.front_face = GL_CCW;
// If the GPU is configured to flip the rasterized triangles, then we need to flip the
// notion of front and back.
const bool flip_triangles{regs.screen_y_control.triangle_rast_flip != 0 &&
regs.viewport_transform[0].scale_y > 0.0f};
if (flip_triangles) {
if (state.cull.front_face == GL_CCW) {
state.cull.front_face = GL_CW;
} else if (state.cull.front_face == GL_CW) {
state.cull.front_face = GL_CCW;
}
}
}

View File

@@ -9,17 +9,16 @@
#include <cstddef>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <tuple>
#include <utility>
#include <boost/icl/interval_map.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -52,7 +51,7 @@ namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info);
@@ -73,7 +72,6 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -228,11 +226,6 @@ private:
AccelDraw accelerate_draw = AccelDraw::Disabled;
OGLFramebuffer clear_framebuffer;
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
std::mutex pages_mutex;
};
} // namespace OpenGL

View File

@@ -3,13 +3,16 @@
// Refer to the license.txt file included.
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include "common/assert.h"
#include "common/hash.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -21,18 +24,20 @@
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
namespace {
// One UBO is always reserved for emulation values on staged shaders
constexpr u32 STAGE_RESERVED_UBOS = 1;
struct UnspecializedShader {
std::string code;
GLShader::ShaderEntries entries;
ProgramType program_type;
};
constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
namespace {
constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
/// Gets the address for the specified shader stage program
GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
/// Gets if the current instruction offset is a scheduler instruction
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// Sched instructions appear once every 4 instructions.
constexpr std::size_t SchedPeriod = 4;
const std::size_t absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
if (instruction == 0) {
break;
}
}
offset++;
}
// The last instruction is included in the program size
return std::min(offset + 1, program.size());
}
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
const u8* host_ptr) {
@@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
});
memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
program_code.size() * sizeof(u64));
program_code.resize(CalculateProgramSize(program_code));
return program_code;
}
@@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
}
}
/// Gets if the current instruction offset is a scheduler instruction
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// Sched instructions appear once every 4 instructions.
constexpr std::size_t SchedPeriod = 4;
const std::size_t absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
/// Describes primitive behavior on geometry shaders
constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
@@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
return {};
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
std::size_t offset = start_offset;
std::size_t size = start_offset * sizeof(u64);
while (offset < program.size()) {
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if ((instruction & mask) == self_jumping_branch) {
// End on Maxwell's "nop" instruction
break;
}
if (instruction == 0) {
break;
}
}
size += sizeof(u64);
offset++;
}
// The last instruction is included in the program size
return std::min(size + sizeof(u64), program.size() * sizeof(u64));
}
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
if (size_a == 0) {
size_a = CalculateProgramSize(code);
const ProgramCode& code_b) {
u64 unique_identifier = boost::hash_value(code);
if (program_type == ProgramType::VertexA) {
// VertexA programs include two programs
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
if (program_type != ProgramType::VertexA) {
return unique_identifier;
}
// VertexA programs include two programs
std::size_t seed = 0;
boost::hash_combine(seed, unique_identifier);
if (size_b == 0) {
size_b = CalculateProgramSize(code_b);
}
const u64 identifier_b =
Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
boost::hash_combine(seed, identifier_b);
return static_cast<u64>(seed);
return unique_identifier;
}
/// Creates an unspecialized program from code streams
GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
ProgramCode program_code, ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
setup.program.size_a = CalculateProgramSize(program_code);
setup.program.size_b = 0;
if (program_type == ProgramType::VertexA) {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(program_code_b);
setup.program.size_b = CalculateProgramSize(program_code_b);
}
setup.program.unique_identifier = GetUniqueIdentifier(
program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
const std::optional<ShaderIR>& ir_b) {
switch (program_type) {
case ProgramType::VertexA:
case ProgramType::VertexB:
return GLShader::GenerateVertexShader(device, setup);
return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
case ProgramType::Geometry:
return GLShader::GenerateGeometryShader(device, setup);
return GLShader::GenerateGeometryShader(device, ir);
case ProgramType::Fragment:
return GLShader::GenerateFragmentShader(device, setup);
return GLShader::GenerateFragmentShader(device, ir);
case ProgramType::Compute:
return GLShader::GenerateComputeShader(device, setup);
return GLShader::GenerateComputeShader(device, ir);
default:
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
}
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
ProgramType program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
constexpr const char* GetProgramTypeName(ProgramType program_type) {
switch (program_type) {
case ProgramType::VertexA:
case ProgramType::VertexB:
return "VS";
case ProgramType::TessellationControl:
return "TCS";
case ProgramType::TessellationEval:
return "TES";
case ProgramType::Geometry:
return "GS";
case ProgramType::Fragment:
return "FS";
case ProgramType::Compute:
return "CS";
}
return "UNK";
}
Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
switch (program_type) {
case ProgramType::VertexA:
case ProgramType::VertexB:
return Tegra::Engines::ShaderType::Vertex;
case ProgramType::TessellationControl:
return Tegra::Engines::ShaderType::TesselationControl;
case ProgramType::TessellationEval:
return Tegra::Engines::ShaderType::TesselationEval;
case ProgramType::Geometry:
return Tegra::Engines::ShaderType::Geometry;
case ProgramType::Fragment:
return Tegra::Engines::ShaderType::Fragment;
case ProgramType::Compute:
return Tegra::Engines::ShaderType::Compute;
}
UNREACHABLE();
return {};
}
std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
}
Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
Core::System& system, ProgramType program_type) {
if (program_type == ProgramType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
}
}
std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
GetConstBufferEngineInterface(system, program_type));
}
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
for (const auto& key : usage.keys) {
const auto [buffer, offset] = key.first;
locker.InsertKey(buffer, offset, key.second);
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
locker.InsertBoundSampler(offset, sampler);
}
for (const auto& [key, sampler] : usage.bindless_samplers) {
const auto [buffer, offset] = key;
locker.InsertBindlessSampler(buffer, offset, sampler);
}
}
CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
const ProgramCode& program_code, const ProgramCode& program_code_b,
const ProgramVariant& variant, ConstBufferLocker& locker,
bool hint_retrievable = false) {
LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
const bool is_compute = program_type == ProgramType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
std::optional<ShaderIR> ir_b;
if (!program_code_b.empty()) {
ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
}
const auto entries = GLShader::GetEntries(ir);
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = R"(#version 430 core
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shader_viewport_layer_array : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_NV_gpu_shader5 : enable
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
)";
if (program_type == ProgramType::Compute) {
)",
GetShaderId(unique_identifier, program_type));
if (is_compute) {
source += "#extension GL_ARB_compute_variable_group_size : require\n";
}
source += '\n';
if (program_type != ProgramType::Compute) {
if (!is_compute) {
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
}
@@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
}
source += '\n';
source += code;
source += GenerateGLSL(device, program_type, ir, ir_b);
OGLShader shader;
shader.Create(source.c_str(), GetShaderType(program_type));
@@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
return program;
}
std::set<GLenum> GetSupportedFormats() {
std::set<GLenum> supported_formats;
std::unordered_set<GLenum> GetSupportedFormats() {
GLint num_formats{};
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
for (const GLint format : formats)
std::unordered_set<GLenum> supported_formats;
for (const GLint format : formats) {
supported_formats.insert(static_cast<GLenum>(format));
}
return supported_formats;
}
} // Anonymous namespace
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result)
: RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, program_type{program_type},
disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
GLShader::ShaderEntries entries, ProgramCode program_code,
ProgramCode program_code_b)
: RasterizerCacheObject{params.host_ptr}, system{params.system},
disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
if (!params.precompiled_variants) {
return;
}
for (const auto& pair : *params.precompiled_variants) {
auto locker = MakeLocker(system, program_type);
const auto& usage = pair->first;
FillLocker(*locker, usage);
std::unique_ptr<LockerVariant>* locker_variant = nullptr;
const auto it =
std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
return variant->locker->HasEqualKeys(*locker);
});
if (it == locker_variants.end()) {
locker_variant = &locker_variants.emplace_back();
*locker_variant = std::make_unique<LockerVariant>();
locker_variant->get()->locker = std::move(locker);
} else {
locker_variant = &*it;
}
locker_variant->get()->programs.emplace(usage.variant, pair->second);
}
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
ProgramCode&& program_code,
ProgramCode&& program_code_b) {
const auto code_size{CalculateProgramSize(program_code)};
const auto code_size_b{CalculateProgramSize(program_code_b)};
auto result{
CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return {};
}
ProgramCode program_code, ProgramCode program_code_b) {
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
params.unique_identifier, GetProgramType(program_type),
static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
std::move(program_code), std::move(program_code_b)));
params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)));
const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!program_code_b.empty()) {
// ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
// }
return std::shared_ptr<CachedShader>(
new CachedShader(params, GetProgramType(program_type), std::move(result)));
new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
std::move(program_code), std::move(program_code_b)));
}
Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
GLShader::ProgramResult result) {
return std::shared_ptr<CachedShader>(
new CachedShader(params, GetProgramType(program_type), std::move(result)));
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
params.disk_cache.SaveRaw(
ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
return std::shared_ptr<CachedShader>(new CachedShader(
params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
const auto code_size{CalculateProgramSize(code)};
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
static_cast<u32>(code_size / sizeof(u64)), 0,
std::move(code), {}));
return std::shared_ptr<CachedShader>(
new CachedShader(params, ProgramType::Compute, std::move(result)));
}
Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
GLShader::ProgramResult result) {
return std::shared_ptr<CachedShader>(
new CachedShader(params, ProgramType::Compute, std::move(result)));
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized) {
return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
unspecialized.entries, unspecialized.code,
unspecialized.code_b));
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
UpdateVariant();
const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(variant);
if (!program) {
program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
variant, *curr_variant->locker);
disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
}
@@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
return {program->handle, base_bindings};
}
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
void CachedShader::UpdateVariant() {
if (curr_variant && !curr_variant->locker->IsConsistent()) {
curr_variant = nullptr;
}
if (!curr_variant) {
for (auto& variant : locker_variants) {
if (variant->locker->IsConsistent()) {
curr_variant = variant.get();
}
}
}
if (!curr_variant) {
auto& new_variant = locker_variants.emplace_back();
new_variant = std::make_unique<LockerVariant>();
new_variant->locker = MakeLocker(system, program_type);
curr_variant = new_variant.get();
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
const ConstBufferLocker& locker) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
usage.keys = locker.GetKeys();
usage.bound_samplers = locker.GetBoundSamplers();
usage.bindless_samplers = locker.GetBindlessSamplers();
return usage;
}
@@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
const auto [raws, shader_usages] = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
const auto supported_formats{GetSupportedFormats()};
const auto unspecialized_shaders{
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
if (stop_loading) {
if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
return;
}
// Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive
const auto dumps = disk_cache.LoadPrecompiled();
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
// serialize the virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false;
// Inform the frontend about shader build initialization
@@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
const auto& usage{shader_usages[i]};
LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
usage.unique_identifier, i, shader_usages.size());
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
const auto dump{dumps.find(usage)};
@@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
if (!shader) {
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
unspecialized.program_type, usage.variant, true);
auto locker{MakeLocker(system, unspecialized.program_type)};
FillLocker(*locker, usage);
shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
unspecialized.code, unspecialized.code_b, usage.variant,
*locker, true);
}
std::scoped_lock lock(mutex);
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
}
precompiled_programs.emplace(usage, std::move(shader));
// TODO(Rodrigo): Is there a better way to do this?
precompiled_variants[usage.unique_identifier].push_back(
precompiled_programs.find(usage));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
@@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
return;
}
@@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
// precompiling them
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{shader_usages[i]};
@@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
const auto it = precompiled_variants.find(unique_identifier);
return it == precompiled_variants.end() ? nullptr : &it->second;
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
return {};
@@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
return shader;
}
std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws,
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
std::unordered_map<u64, UnspecializedShader> unspecialized;
const std::vector<ShaderDiskCacheRaw>& raws) {
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading) {
return {};
return false;
}
const auto& raw{raws[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(
Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - "
"removing shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateTransferable();
return {};
return false;
}
GLShader::ProgramResult result;
if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
// If it's stored in the precompiled file, avoid decompiling it here
const auto& stored_decompiled{it->second};
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
const u32 main_offset =
raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (raw.HasProgramA()) {
// ir_b.emplace(raw.GetProgramCodeB(), main_offset);
// }
precompiled_shaders.insert({unique_identifier, result});
unspecialized.insert(
{raw.GetUniqueIdentifier(),
{std::move(result.first), std::move(result.second), raw.GetProgramType()}});
UnspecializedShader unspecialized;
unspecialized.entries = GLShader::GetEntries(ir);
unspecialized.program_type = raw.GetProgramType();
unspecialized.code = raw.GetProgramCode();
unspecialized.code_b = raw.GetProgramCodeB();
unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
return unspecialized;
return true;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
}
auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
const auto host_ptr{memory_manager.GetPointer(program_addr)};
const auto host_ptr{memory_manager.GetPointer(address)};
Shader shader{TryGet(host_ptr)};
if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
// No shader found - create a new one
ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
ProgramCode program_code_b;
const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
if (is_program_a) {
const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
program_code_b = GetShaderCode(memory_manager, program_addr_b,
memory_manager.GetPointer(program_addr_b));
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
}
const auto unique_identifier =
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
host_ptr, unique_identifier};
const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);
if (found == precompiled_shaders.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
std::move(program_code_b));
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
std::move(code_b));
} else {
shader = CachedShader::CreateStageFromCache(params, program, found->second);
shader = CachedShader::CreateFromCache(params, found->second);
}
Register(shader);
@@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
// No kernel found - create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
host_ptr, unique_identifier};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);
if (found == precompiled_shaders.end()) {
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
kernel = CachedShader::CreateKernelFromCache(params, found->second);
kernel = CachedShader::CreateFromCache(params, found->second);
}
Register(kernel);

View File

@@ -8,9 +8,10 @@
#include <atomic>
#include <bitset>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <glad/glad.h>
@@ -20,6 +21,8 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/shader_ir.h"
namespace Core {
class System;
@@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>;
using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
struct UnspecializedShader {
GLShader::ShaderEntries entries;
ProgramType program_type;
ProgramCode code;
ProgramCode code_b;
};
struct ShaderParameters {
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject {
public:
static Shader CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
ProgramCode&& program_code, ProgramCode&& program_code_b);
ProgramCode program_code, ProgramCode program_code_b);
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
static Shader CreateStageFromCache(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
GLShader::ProgramResult result);
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
static Shader CreateKernelFromCache(const ShaderParameters& params,
GLShader::ProgramResult result);
static Shader CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return shader_length;
return program_code.size() * sizeof(u64);
}
/// Gets the shader entries for the shader
@@ -83,24 +89,36 @@ public:
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
struct LockerVariant {
std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
std::unordered_map<ProgramVariant, CachedProgram> programs;
};
explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result);
GLShader::ShaderEntries entries, ProgramCode program_code,
ProgramCode program_code_b);
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
void UpdateVariant();
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
const VideoCommon::Shader::ConstBufferLocker& locker) const;
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr{};
u64 unique_identifier{};
ProgramType program_type{};
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
GLShader::ShaderEntries entries;
std::string code;
std::size_t shader_length{};
std::unordered_map<ProgramVariant, CachedProgram> programs;
ProgramCode program_code;
ProgramCode program_code_b;
LockerVariant* curr_variant = nullptr;
std::vector<std::unique_ptr<LockerVariant>> locker_variants;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -123,21 +141,26 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws,
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws);
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
const std::unordered_set<GLenum>& supported_formats);
const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs;
std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};

View File

@@ -415,27 +415,6 @@ public:
return code.GetResult();
}
ShaderEntries GetShaderEntries() const {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
cbuf.first);
}
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& [offset, image] : ir.GetImages()) {
entries.images.emplace_back(image);
}
for (const auto& [base, usage] : ir.GetGlobalMemory()) {
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
usage.is_read, usage.is_written);
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
return entries;
}
private:
friend class ASTDecompiler;
friend class ExprDecompiler;
@@ -1893,10 +1872,6 @@ private:
Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
// If a geometry shader is attached, it will always flip (it's the last stage before
// fragment). For more info about flipping, refer to gl_shader_gen.cpp.
code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1904,14 +1879,12 @@ private:
Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
code.AddLine("EndPrimitive();");
return {};
}
Expression YNegate(Operation operation) {
// Config pack's third value is Y_NEGATE's state.
return {"config_pack[2]", Type::Uint};
return {"y_negate", Type::Float};
}
template <u32 element>
@@ -2314,10 +2287,13 @@ public:
switch (index) {
case Tegra::Shader::Pred::NeverExecute:
target = "false";
break;
case Tegra::Shader::Pred::UnusedIndex:
target = "true";
break;
default:
target = decomp.GetPredicate(index);
break;
}
} else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
target = decomp.GetInternalFlag(flag->GetFlag());
@@ -2335,6 +2311,11 @@ public:
inner += expr.value ? "true" : "false";
}
void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
inner +=
"( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
}
const std::string& GetResult() const {
return inner;
}
@@ -2473,25 +2454,46 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
std::string GetCommonDeclarations() {
return fmt::format(
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
"#define utof uintBitsToFloat\n\n"
"bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
" bvec2 is_nan1 = isnan(pair1);\n"
" bvec2 is_nan2 = isnan(pair2);\n"
" return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
"is_nan2.y);\n"
"}}\n\n");
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
cbuf.first);
}
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& [offset, image] : ir.GetImages()) {
entries.images.emplace_back(image);
}
for (const auto& [base, usage] : ir.GetGlobalMemory()) {
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
usage.is_written);
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
return entries;
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
const std::string& suffix) {
std::string GetCommonDeclarations() {
return R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
bvec2 is_nan1 = isnan(pair1);
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}
)";
}
std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
decompiler.Decompile();
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
return decompiler.GetResult();
}
} // namespace OpenGL::GLShader

View File

@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
namespace OpenGL::GLShader {
struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
@@ -93,9 +90,11 @@ struct ShaderEntries {
std::size_t shader_length{};
};
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
std::string GetCommonDeclarations();
ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
ProgramType stage, const std::string& suffix);
std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
ProgramType stage, const std::string& suffix);
} // namespace OpenGL::GLShader

View File

@@ -22,6 +22,29 @@
namespace OpenGL {
using VideoCommon::Shader::BindlessSamplerMap;
using VideoCommon::Shader::BoundSamplerMap;
using VideoCommon::Shader::KeyMap;
namespace {
struct ConstBufferKey {
u32 cbuf;
u32 offset;
u32 value;
};
struct BoundSamplerKey {
u32 offset;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerKey {
u32 cbuf;
u32 offset;
Tegra::Engines::SamplerDescriptor sampler;
};
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
@@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 {
Usage,
};
enum class PrecompiledEntryKind : u32 {
Decompiled,
Dump,
};
constexpr u32 NativeVersion = 4;
constexpr u32 NativeVersion = 5;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
namespace {
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
return hash;
}
} // namespace
} // Anonymous namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b)
: unique_identifier{unique_identifier}, program_type{program_type},
program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(program_type)) != 1 ||
file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
return false;
}
if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
return false;
if (HasProgramA() &&
file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
return false;
}
return true;
@@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL,
"Failed to get transferable cache version for title id={} - skipping",
"Failed to get transferable cache version for title id={}, skipping",
GetTitleID());
return {};
}
if (version < NativeVersion) {
LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
file.Close();
InvalidateTransferable();
is_usable = true;
@@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator - skipping");
"of the emulator, skipping");
return {};
}
// Version is valid, load the shaders
constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
std::vector<ShaderDiskCacheRaw> raws;
std::vector<ShaderDiskCacheUsage> usages;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
return {};
}
@@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
transferable.insert({entry.GetUniqueIdentifier(), {}});
@@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
break;
}
case TransferableEntryKind::Usage: {
ShaderDiskCacheUsage usage{};
if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
ShaderDiskCacheUsage usage;
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
usages.push_back(std::move(usage));
break;
}
default:
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
static_cast<u32>(kind));
return {};
}
@@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {{std::move(raws), std::move(usages)}};
}
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
FileUtil::IOFile file(GetPrecompiledPath(), "rb");
std::string path = GetPrecompiledPath();
FileUtil::IOFile file(path, "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
@@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
const auto result = LoadPrecompiledFile(file);
if (!result) {
LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={} - removing",
"Failed to load precompiled cache for game with title id={}, removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
@@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
return *result;
}
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
@@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
ShaderDumpsMap dumps;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{};
if (!LoadObjectFromPrecompiled(kind)) {
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
ShaderDiskCacheUsage usage;
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
!LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
!LoadObjectFromPrecompiled(num_bound_samplers) ||
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
!LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
!LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
ShaderDiskCacheDump dump;
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return {};
}
switch (kind) {
case PrecompiledEntryKind::Decompiled: {
u64 unique_identifier{};
if (!LoadObjectFromPrecompiled(unique_identifier)) {
return {};
}
auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
decompiled.insert({unique_identifier, std::move(*entry)});
break;
}
case PrecompiledEntryKind::Dump: {
ShaderDiskCacheUsage usage;
if (!LoadObjectFromPrecompiled(usage)) {
return {};
}
ShaderDiskCacheDump dump;
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
return {};
}
u32 binary_length{};
if (!LoadObjectFromPrecompiled(binary_length)) {
return {};
}
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
dumps.insert({usage, dump});
break;
}
default:
u32 binary_length{};
if (!LoadObjectFromPrecompiled(binary_length)) {
return {};
}
}
return {{decompiled, dumps}};
}
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
u32 code_size{};
if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
std::string code(code_size, '\0');
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
entry.code = std::move(code);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
}
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
bool is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
}
u32 samplers_count{};
if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
dumps.emplace(std::move(usage), dump);
}
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
bool is_array{};
bool is_shadow{};
bool is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.samplers.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
}
u32 images_count{};
if (!LoadObjectFromPrecompiled(images_count)) {
return {};
}
for (u32 i = 0; i < images_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_bindless{};
u8 is_written{};
u8 is_read{};
u8 is_atomic{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
!LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
!LoadObjectFromPrecompiled(is_atomic)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
is_read != 0, is_atomic != 0);
}
u32 global_memory_count{};
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
bool is_read{};
bool is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
is_written);
}
for (auto& clip_distance : entry.entries.clip_distances) {
if (!LoadObjectFromPrecompiled(clip_distance)) {
return {};
}
}
u64 shader_length{};
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
}
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
!SaveObjectToPrecompiled(unique_identifier) ||
!SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
!SaveArrayToPrecompiled(code.data(), code.size())) {
return false;
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
return false;
}
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
!SaveObjectToPrecompiled(cbuf.IsIndirect())) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
return false;
}
for (const auto& sampler : entries.samplers) {
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
!SaveObjectToPrecompiled(sampler.IsArray()) ||
!SaveObjectToPrecompiled(sampler.IsShadow()) ||
!SaveObjectToPrecompiled(sampler.IsBindless())) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
return false;
}
for (const auto& image : entries.images) {
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
return false;
}
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
!SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
if (!SaveObjectToPrecompiled(clip_distance)) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
return false;
}
return true;
return dumps;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
}
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
if (!file.IsOpen()) {
return;
}
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
@@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
file.Close();
InvalidateTransferable();
};
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
Close();
return;
}
}
void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries) {
if (!is_usable) {
return;
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
if (!SaveDecompiledFile(unique_identifier, code, entries)) {
LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing");
InvalidatePrecompiled();
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
}
@@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
return;
}
// TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
// when writing the dump. This should be done the moment I get access to write to the virtual
// file.
if (precompiled_cache_virtual_file.GetSize() == 0) {
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
@@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
!SaveObjectToPrecompiled(usage) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
usage.unique_identifier);
InvalidatePrecompiled();
};
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
!SaveObjectToPrecompiled(usage.variant) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
Close();
return;
}
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
Close();
}
}
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories())
if (!EnsureDirectories()) {
return {};
}
const auto transferable_path{GetTransferablePath()};
const bool existed = FileUtil::Exists(transferable_path);
@@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
precompiled_cache_virtual_file_offset = 0;
const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
const std::vector<u8>& compressed =
const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
const std::vector<u8> compressed =
Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
const auto precompiled_path{GetPrecompiledPath()};

View File

@@ -8,6 +8,7 @@
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
@@ -19,6 +20,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/const_buffer_locker.h"
namespace Core {
class System;
@@ -53,6 +55,7 @@ struct BaseBindings {
return !operator==(rhs);
}
};
static_assert(std::is_trivially_copyable_v<BaseBindings>);
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
@@ -70,13 +73,20 @@ struct ProgramVariant {
}
};
static_assert(std::is_trivially_copyable_v<ProgramVariant>);
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
ProgramVariant variant;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
rhs.bindless_samplers);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -123,8 +133,7 @@ namespace OpenGL {
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b);
ProgramCode program_code, ProgramCode program_code_b = {});
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
@@ -155,22 +164,14 @@ public:
private:
u64 unique_identifier{};
ProgramType program_type{};
u32 program_code_size{};
u32 program_code_size_b{};
ProgramCode program_code;
ProgramCode program_code_b;
};
/// Contains decompiled data from a shader
struct ShaderDiskCacheDecompiled {
std::string code;
GLShader::ShaderEntries entries;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
GLenum binary_format;
GLenum binary_format{};
std::vector<u8> binary;
};
@@ -184,9 +185,7 @@ public:
LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiled();
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -200,10 +199,6 @@ public:
/// Saves shader usage to the transferable file. Does not check for collisions.
void SaveUsage(const ShaderDiskCacheUsage& usage);
/// Saves a decompiled entry to the precompiled file. Does not check for collisions.
void SaveDecompiled(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
@@ -212,18 +207,9 @@ public:
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiledFile(FileUtil::IOFile& file);
/// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
/// failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
/// Saves a decompiled entry to the passed file. Returns true on success.
bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries);
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;

View File

@@ -16,93 +16,51 @@ using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
static constexpr u32 PROGRAM_OFFSET = 10;
static constexpr u32 COMPUTE_OFFSET = 0;
static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
float y_direction;
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
ProgramResult program = Decompile(device, program_ir, stage, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
settings);
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
out += program_b.first;
const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
out += Decompile(device, ir, stage, "vertex");
if (ir_b) {
out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
}
out += R"(
void main() {
execute_vertex();
)";
if (setup.IsDualProgram()) {
if (ir_b) {
out += " execute_vertex_b();";
}
out += R"(
// Set Position Y direction
gl_Position.y *= utof(config_pack[2]);
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
// Viewport can be flipped, which is unsupported by glViewport
gl_Position.xy *= viewport_flip.xy;
}
})";
return {std::move(out), std::move(program.second)};
out += "}\n";
return out;
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: GS" + id + "\n\n";
out += GetCommonDeclarations();
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
float y_direction;
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
out += program.first;
out += Decompile(device, ir, ProgramType::Geometry, "geometry");
out += R"(
void main() {
execute_geometry();
};)";
return {std::move(out), std::move(program.second)};
}
)";
return out;
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
@@ -114,41 +72,29 @@ layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
float y_direction;
};
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
out += program.first;
out += Decompile(device, ir, ProgramType::Fragment, "fragment");
out += R"(
void main() {
execute_fragment();
}
)";
return {std::move(out), std::move(program.second)};
return out;
}
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: CS" + id + "\n\n";
out += GetCommonDeclarations();
const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings);
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
out += program.first;
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += Decompile(device, ir, ProgramType::Compute, "compute");
out += R"(
void main() {
execute_compute();
}
)";
return {std::move(out), std::move(program.second)};
return out;
}
} // namespace OpenGL::GLShader

View File

@@ -17,44 +17,18 @@ class Device;
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
struct ShaderSetup {
explicit ShaderSetup(ProgramCode program_code) {
program.code = std::move(program_code);
}
struct {
ProgramCode code;
ProgramCode code_b; // Used for dual vertex shaders
u64 unique_identifier;
std::size_t size_a;
std::size_t size_b;
} program;
/// Used in scenarios where we have a dual vertex shaders
void SetProgramB(ProgramCode program_b) {
program.code_b = std::move(program_b);
has_program_b = true;
}
bool IsDualProgram() const {
return has_program_b;
}
private:
bool has_program_b{};
};
using VideoCommon::Shader::ShaderIR;
/// Generates the GLSL vertex shader program source code for the given VS program
ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
/// Generates the GLSL geometry shader program source code for the given GS program
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL fragment shader program source code for the given FS program
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL compute shader program source code for the given CS program
ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
} // namespace OpenGL::GLShader

View File

@@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() {
old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
const auto& state = maxwell.state;
// TODO(bunnei): Support more than one viewport
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
instance_id = state.current_instance;
// Assign in which stage the position has to be flipped
// (the last stage before the fragment shader).
constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
if (maxwell.regs.shader_config[geometry_index].enable) {
flip_stage = geometry_index;
} else {
flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
}
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL::GLShader

View File

@@ -18,17 +18,12 @@ namespace OpenGL::GLShader {
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
struct MaxwellUniformData {
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
struct alignas(16) MaxwellUniformData {
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
alignas(16) GLvec4 viewport_flip;
struct alignas(16) {
GLuint instance_id;
GLuint flip_stage;
GLfloat y_direction;
};
GLfloat y_direction;
};
static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <iterator>
#include <glad/glad.h>
#include "common/assert.h"
@@ -69,147 +70,29 @@ void Enable(GLenum cap, GLuint index, bool enable) {
}
void Enable(GLenum cap, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value))
if (UpdateValue(current_value, new_value)) {
Enable(cap, new_value);
}
}
void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value))
if (UpdateValue(current_value, new_value)) {
Enable(cap, index, new_value);
}
}
} // namespace
} // Anonymous namespace
OpenGLState::OpenGLState() {
// These all match default OpenGL values
framebuffer_srgb.enabled = false;
multisample_control.alpha_to_coverage = false;
multisample_control.alpha_to_one = false;
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
depth.test_enabled = false;
depth.test_func = GL_LESS;
depth.write_mask = GL_TRUE;
primitive_restart.enabled = false;
primitive_restart.index = 0;
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
const auto ResetStencil = [](auto& config) {
config.test_func = GL_ALWAYS;
config.test_ref = 0;
config.test_mask = 0xFFFFFFFF;
config.write_mask = 0xFFFFFFFF;
config.action_depth_fail = GL_KEEP;
config.action_depth_pass = GL_KEEP;
config.action_stencil_fail = GL_KEEP;
};
stencil.test_enabled = false;
ResetStencil(stencil.front);
ResetStencil(stencil.back);
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
item.width = 0;
item.height = 0;
item.depth_range_near = 0.0f;
item.depth_range_far = 1.0f;
item.scissor.enabled = false;
item.scissor.x = 0;
item.scissor.y = 0;
item.scissor.width = 0;
item.scissor.height = 0;
}
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
item.a_equation = GL_FUNC_ADD;
item.src_rgb_func = GL_ONE;
item.dst_rgb_func = GL_ZERO;
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
independant_blend.enabled = false;
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
logic_op.enabled = false;
logic_op.operation = GL_COPY;
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
draw.shader_program = 0;
draw.program_pipeline = 0;
clip_distance = {};
point.size = 1;
fragment_color_clamp.enabled = false;
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
polygon_offset.fill_enable = false;
polygon_offset.line_enable = false;
polygon_offset.point_enable = false;
polygon_offset.factor = 0.0f;
polygon_offset.units = 0.0f;
polygon_offset.clamp = 0.0f;
alpha_test.enabled = false;
alpha_test.func = GL_ALWAYS;
alpha_test.ref = 0.0f;
}
OpenGLState::OpenGLState() = default;
void OpenGLState::SetDefaultViewports() {
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
item.width = 0;
item.height = 0;
item.depth_range_near = 0.0f;
item.depth_range_far = 1.0f;
item.scissor.enabled = false;
item.scissor.x = 0;
item.scissor.y = 0;
item.scissor.width = 0;
item.scissor.height = 0;
}
viewports.fill(Viewport{});
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
}
void OpenGLState::ApplyDefaultState() {
glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_PRIMITIVE_RESTART);
glDisable(GL_STENCIL_TEST);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_SCISSOR_TEST);
}
void OpenGLState::ApplyFramebufferState() const {
void OpenGLState::ApplyFramebufferState() {
if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
@@ -218,52 +101,52 @@ void OpenGLState::ApplyFramebufferState() const {
}
}
void OpenGLState::ApplyVertexArrayState() const {
void OpenGLState::ApplyVertexArrayState() {
if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
glBindVertexArray(draw.vertex_array);
}
}
void OpenGLState::ApplyShaderProgram() const {
void OpenGLState::ApplyShaderProgram() {
if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
glUseProgram(draw.shader_program);
}
}
void OpenGLState::ApplyProgramPipeline() const {
void OpenGLState::ApplyProgramPipeline() {
if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
glBindProgramPipeline(draw.program_pipeline);
}
}
void OpenGLState::ApplyClipDistances() const {
void OpenGLState::ApplyClipDistances() {
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
clip_distance[i]);
}
}
void OpenGLState::ApplyPointSize() const {
void OpenGLState::ApplyPointSize() {
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
}
void OpenGLState::ApplyFragmentColorClamp() const {
void OpenGLState::ApplyFragmentColorClamp() {
if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
}
}
void OpenGLState::ApplyMultisample() const {
void OpenGLState::ApplyMultisample() {
Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
multisample_control.alpha_to_coverage);
Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
multisample_control.alpha_to_one);
}
void OpenGLState::ApplyDepthClamp() const {
void OpenGLState::ApplyDepthClamp() {
if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
@@ -276,7 +159,7 @@ void OpenGLState::ApplyDepthClamp() const {
Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
}
void OpenGLState::ApplySRgb() const {
void OpenGLState::ApplySRgb() {
if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
@@ -287,7 +170,7 @@ void OpenGLState::ApplySRgb() const {
}
}
void OpenGLState::ApplyCulling() const {
void OpenGLState::ApplyCulling() {
Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
if (UpdateValue(cur_state.cull.mode, cull.mode)) {
@@ -299,7 +182,12 @@ void OpenGLState::ApplyCulling() const {
}
}
void OpenGLState::ApplyColorMask() const {
void OpenGLState::ApplyColorMask() {
if (!dirty.color_mask) {
return;
}
dirty.color_mask = false;
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
const auto& updated = color_mask[i];
auto& current = cur_state.color_mask[i];
@@ -314,7 +202,7 @@ void OpenGLState::ApplyColorMask() const {
}
}
void OpenGLState::ApplyDepth() const {
void OpenGLState::ApplyDepth() {
Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
if (cur_state.depth.test_func != depth.test_func) {
@@ -328,7 +216,7 @@ void OpenGLState::ApplyDepth() const {
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
void OpenGLState::ApplyPrimitiveRestart() {
Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
if (cur_state.primitive_restart.index != primitive_restart.index) {
@@ -337,7 +225,12 @@ void OpenGLState::ApplyPrimitiveRestart() const {
}
}
void OpenGLState::ApplyStencilTest() const {
void OpenGLState::ApplyStencilTest() {
if (!dirty.stencil_state) {
return;
}
dirty.stencil_state = false;
Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
@@ -366,7 +259,7 @@ void OpenGLState::ApplyStencilTest() const {
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
void OpenGLState::ApplyViewport() const {
void OpenGLState::ApplyViewport() {
for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
const auto& updated = viewports[i];
auto& current = cur_state.viewports[i];
@@ -403,7 +296,7 @@ void OpenGLState::ApplyViewport() const {
}
}
void OpenGLState::ApplyGlobalBlending() const {
void OpenGLState::ApplyGlobalBlending() {
const Blend& updated = blend[0];
Blend& current = cur_state.blend[0];
@@ -427,7 +320,7 @@ void OpenGLState::ApplyGlobalBlending() const {
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
const Blend& updated = blend[target];
Blend& current = cur_state.blend[target];
@@ -451,7 +344,12 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
}
}
void OpenGLState::ApplyBlending() const {
void OpenGLState::ApplyBlending() {
if (!dirty.blend_state) {
return;
}
dirty.blend_state = false;
if (independant_blend.enabled) {
const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
@@ -470,7 +368,7 @@ void OpenGLState::ApplyBlending() const {
}
}
void OpenGLState::ApplyLogicOp() const {
void OpenGLState::ApplyLogicOp() {
Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
@@ -478,7 +376,12 @@ void OpenGLState::ApplyLogicOp() const {
}
}
void OpenGLState::ApplyPolygonOffset() const {
void OpenGLState::ApplyPolygonOffset() {
if (!dirty.polygon_offset) {
return;
}
dirty.polygon_offset = false;
Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
polygon_offset.fill_enable);
Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
@@ -499,7 +402,7 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
void OpenGLState::ApplyAlphaTest() const {
void OpenGLState::ApplyAlphaTest() {
Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
std::tie(alpha_test.func, alpha_test.ref))) {
@@ -507,19 +410,25 @@ void OpenGLState::ApplyAlphaTest() const {
}
}
void OpenGLState::ApplyTextures() const {
void OpenGLState::ApplyClipControl() {
if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
}
}
void OpenGLState::ApplyTextures() {
if (const auto update = UpdateArray(cur_state.textures, textures)) {
glBindTextures(update->first, update->second, textures.data() + update->first);
}
}
void OpenGLState::ApplySamplers() const {
void OpenGLState::ApplySamplers() {
if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
glBindSamplers(update->first, update->second, samplers.data() + update->first);
}
}
void OpenGLState::ApplyImages() const {
void OpenGLState::ApplyImages() {
if (const auto update = UpdateArray(cur_state.images, images)) {
glBindImageTextures(update->first, update->second, images.data() + update->first);
}
@@ -535,33 +444,22 @@ void OpenGLState::Apply() {
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
if (dirty.color_mask) {
ApplyColorMask();
dirty.color_mask = false;
}
ApplyColorMask();
ApplyDepthClamp();
ApplyViewport();
if (dirty.stencil_state) {
ApplyStencilTest();
dirty.stencil_state = false;
}
ApplyStencilTest();
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
if (dirty.blend_state) {
ApplyBlending();
dirty.blend_state = false;
}
ApplyBlending();
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyImages();
if (dirty.polygon_offset) {
ApplyPolygonOffset();
dirty.polygon_offset = false;
}
ApplyPolygonOffset();
ApplyAlphaTest();
ApplyClipControl();
}
void OpenGLState::EmulateViewportWithScissor() {

View File

@@ -5,168 +5,150 @@
#pragma once
#include <array>
#include <type_traits>
#include <glad/glad.h>
#include "video_core/engines/maxwell_3d.h"
namespace OpenGL {
namespace TextureUnits {
struct TextureUnit {
GLint id;
constexpr GLenum Enum() const {
return static_cast<GLenum>(GL_TEXTURE0 + id);
}
};
constexpr TextureUnit MaxwellTexture(int unit) {
return TextureUnit{unit};
}
constexpr TextureUnit LightingLUT{3};
constexpr TextureUnit FogLUT{4};
constexpr TextureUnit ProcTexNoiseLUT{5};
constexpr TextureUnit ProcTexColorMap{6};
constexpr TextureUnit ProcTexAlphaMap{7};
constexpr TextureUnit ProcTexLUT{8};
constexpr TextureUnit ProcTexDiffLUT{9};
} // namespace TextureUnits
class OpenGLState {
public:
struct {
bool enabled; // GL_FRAMEBUFFER_SRGB
bool enabled = false; // GL_FRAMEBUFFER_SRGB
} framebuffer_srgb;
struct {
bool alpha_to_coverage; // GL_ALPHA_TO_COVERAGE
bool alpha_to_one; // GL_ALPHA_TO_ONE
bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
bool alpha_to_one = false; // GL_ALPHA_TO_ONE
} multisample_control;
struct {
bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB
bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
} fragment_color_clamp;
struct {
bool far_plane;
bool near_plane;
bool far_plane = false;
bool near_plane = false;
} depth_clamp; // GL_DEPTH_CLAMP
struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
GLenum front_face; // GL_FRONT_FACE
bool enabled = false; // GL_CULL_FACE
GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
GLenum front_face = GL_CCW; // GL_FRONT_FACE
} cull;
struct {
bool test_enabled; // GL_DEPTH_TEST
GLenum test_func; // GL_DEPTH_FUNC
GLboolean write_mask; // GL_DEPTH_WRITEMASK
bool test_enabled = false; // GL_DEPTH_TEST
GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
} depth;
struct {
bool enabled;
GLuint index;
bool enabled = false;
GLuint index = 0;
} primitive_restart; // GL_PRIMITIVE_RESTART
struct ColorMask {
GLboolean red_enabled;
GLboolean green_enabled;
GLboolean blue_enabled;
GLboolean alpha_enabled;
GLboolean red_enabled = GL_TRUE;
GLboolean green_enabled = GL_TRUE;
GLboolean blue_enabled = GL_TRUE;
GLboolean alpha_enabled = GL_TRUE;
};
std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
color_mask; // GL_COLOR_WRITEMASK
struct {
bool test_enabled; // GL_STENCIL_TEST
bool test_enabled = false; // GL_STENCIL_TEST
struct {
GLenum test_func; // GL_STENCIL_FUNC
GLint test_ref; // GL_STENCIL_REF
GLuint test_mask; // GL_STENCIL_VALUE_MASK
GLuint write_mask; // GL_STENCIL_WRITEMASK
GLenum action_stencil_fail; // GL_STENCIL_FAIL
GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL
GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS
GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
GLint test_ref = 0; // GL_STENCIL_REF
GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
} front, back;
} stencil;
struct Blend {
bool enabled; // GL_BLEND
GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
GLenum dst_a_func; // GL_BLEND_DST_ALPHA
bool enabled = false; // GL_BLEND
GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
};
std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
struct {
bool enabled;
bool enabled = false;
} independant_blend;
struct {
GLclampf red;
GLclampf green;
GLclampf blue;
GLclampf alpha;
GLclampf red = 0.0f;
GLclampf green = 0.0f;
GLclampf blue = 0.0f;
GLclampf alpha = 0.0f;
} blend_color; // GL_BLEND_COLOR
struct {
bool enabled; // GL_LOGIC_OP_MODE
GLenum operation;
bool enabled = false; // GL_LOGIC_OP_MODE
GLenum operation = GL_COPY;
} logic_op;
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
GLuint shader_program = 0; // GL_CURRENT_PROGRAM
GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
} draw;
struct viewport {
GLint x;
GLint y;
GLint width;
GLint height;
GLfloat depth_range_near; // GL_DEPTH_RANGE
GLfloat depth_range_far; // GL_DEPTH_RANGE
struct Viewport {
GLint x = 0;
GLint y = 0;
GLint width = 0;
GLint height = 0;
GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
struct {
bool enabled; // GL_SCISSOR_TEST
GLint x;
GLint y;
GLsizei width;
GLsizei height;
bool enabled = false; // GL_SCISSOR_TEST
GLint x = 0;
GLint y = 0;
GLsizei width = 0;
GLsizei height = 0;
} scissor;
};
std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
float size; // GL_POINT_SIZE
float size = 1.0f; // GL_POINT_SIZE
} point;
struct {
bool point_enable;
bool line_enable;
bool fill_enable;
GLfloat units;
GLfloat factor;
GLfloat clamp;
bool point_enable = false;
bool line_enable = false;
bool fill_enable = false;
GLfloat units = 0.0f;
GLfloat factor = 0.0f;
GLfloat clamp = 0.0f;
} polygon_offset;
struct {
bool enabled; // GL_ALPHA_TEST
GLenum func; // GL_ALPHA_TEST_FUNC
GLfloat ref; // GL_ALPHA_TEST_REF
bool enabled = false; // GL_ALPHA_TEST
GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
} alpha_test;
std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
struct {
GLenum origin = GL_LOWER_LEFT;
} clip_control;
OpenGLState();
@@ -179,34 +161,32 @@ public:
/// Apply this state as the current OpenGL state
void Apply();
void ApplyFramebufferState() const;
void ApplyVertexArrayState() const;
void ApplyShaderProgram() const;
void ApplyProgramPipeline() const;
void ApplyClipDistances() const;
void ApplyPointSize() const;
void ApplyFragmentColorClamp() const;
void ApplyMultisample() const;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
void ApplyDepth() const;
void ApplyPrimitiveRestart() const;
void ApplyStencilTest() const;
void ApplyViewport() const;
void ApplyTargetBlending(std::size_t target, bool force) const;
void ApplyGlobalBlending() const;
void ApplyBlending() const;
void ApplyLogicOp() const;
void ApplyTextures() const;
void ApplySamplers() const;
void ApplyImages() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
void ApplyAlphaTest() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
void ApplyFramebufferState();
void ApplyVertexArrayState();
void ApplyShaderProgram();
void ApplyProgramPipeline();
void ApplyClipDistances();
void ApplyPointSize();
void ApplyFragmentColorClamp();
void ApplyMultisample();
void ApplySRgb();
void ApplyCulling();
void ApplyColorMask();
void ApplyDepth();
void ApplyPrimitiveRestart();
void ApplyStencilTest();
void ApplyViewport();
void ApplyTargetBlending(std::size_t target, bool force);
void ApplyGlobalBlending();
void ApplyBlending();
void ApplyLogicOp();
void ApplyTextures();
void ApplySamplers();
void ApplyImages();
void ApplyDepthClamp();
void ApplyPolygonOffset();
void ApplyAlphaTest();
void ApplyClipControl();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
@@ -253,5 +233,6 @@ private:
bool color_mask;
} dirty{};
};
static_assert(std::is_trivially_copyable_v<OpenGLState>);
} // namespace OpenGL

View File

@@ -131,6 +131,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, ComponentType::Float, false}, // E5B9G9R9F
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F

View File

@@ -54,11 +54,13 @@ in vec2 frag_tex_coord;
out vec4 color;
uniform sampler2D color_texture;
uniform vec4 backlight;
void main() {
// Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to
// support more framebuffer pixel formats.
color = texture(color_texture, frag_tex_coord);
// Also multiply the color by the backlight multiplier supplied.
color = texture(color_texture, frag_tex_coord) * backlight;
}
)";
@@ -121,8 +123,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
// Load the framebuffer from memory, draw it to the screen, and swap buffers
LoadFBToScreenInfo(*framebuffer);
if (renderer_settings.screenshot_requested)
if (renderer_settings.screenshot_requested) {
CaptureScreenshot();
}
if (renderer_settings.backlight_fade_time > 0) {
UpdateBacklight();
}
DrawScreen(render_window.GetFramebufferLayout());
@@ -205,9 +212,13 @@ void RendererOpenGL::InitOpenGLObjects() {
state.Apply();
uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
uniform_backlight = glGetUniformLocation(shader.handle, "backlight");
attrib_position = glGetAttribLocation(shader.handle, "vert_position");
attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
// Initialize backlight
glUniform4f(uniform_backlight, 1.f, 1.f, 1.f, 1.f);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -416,6 +427,29 @@ void RendererOpenGL::CaptureScreenshot() {
renderer_settings.screenshot_requested = false;
}
void RendererOpenGL::UpdateBacklight() {
constexpr u64 PER_FRAME_FADE_TIME = 1000000000.0f / 60;
const auto fade_time = renderer_settings.backlight_fade_time.load(std::memory_order_relaxed);
auto value = renderer_settings.current_brightness.load(std::memory_order_relaxed);
if (fade_time <= PER_FRAME_FADE_TIME) {
glUniform4f(uniform_backlight, value, value, value, value);
renderer_settings.backlight_fade_time = 0;
fade_time_max = 0;
} else {
if (fade_time_max == 0) {
fade_time_max = fade_time;
value_max = value;
}
value += (value_max - value) * PER_FRAME_FADE_TIME / fade_time_max;
glUniform4f(uniform_backlight, value, value, value, value);
renderer_settings.backlight_fade_time -= PER_FRAME_FADE_TIME;
renderer_settings.current_brightness = value;
}
}
static const char* GetSource(GLenum source) {
#define RET(s) \
case GL_DEBUG_SOURCE_##s: \

View File

@@ -70,6 +70,7 @@ private:
void UpdateFramerate();
void CaptureScreenshot();
void UpdateBacklight();
// Loads framebuffer from emulated memory into the display information structure
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
@@ -97,6 +98,7 @@ private:
// Shader uniform location indices
GLuint uniform_modelview_matrix;
GLuint uniform_color_texture;
GLuint uniform_backlight;
// Shader attribute input indices
GLuint attrib_position;
@@ -105,6 +107,10 @@ private:
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
// Used for backlight transitions
u64 fade_time_max = 0;
f32 value_max = 0;
};
} // namespace OpenGL

View File

@@ -1682,10 +1682,13 @@ public:
switch (index) {
case Tegra::Shader::Pred::NeverExecute:
target = decomp.v_false;
break;
case Tegra::Shader::Pred::UnusedIndex:
target = decomp.v_true;
break;
default:
target = decomp.predicates.at(index);
break;
}
} else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
@@ -1701,6 +1704,13 @@ public:
return expr.value ? decomp.v_true : decomp.v_false;
}
Id operator()(const ExprGprEqual& expr) {
const Id target = decomp.Constant(decomp.t_uint, expr.value);
const Id gpr = decomp.BitcastTo<Type::Uint>(
decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr))));
return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target));
}
Id Visit(const Expr& node) {
return std::visit(*this, *node);
}

View File

@@ -228,6 +228,10 @@ public:
inner += expr.value ? "true" : "false";
}
void operator()(const ExprGprEqual& expr) {
inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
}
const std::string& GetResult() const {
return inner;
}

Some files were not shown because too many files have changed in this diff Show More