Compare commits

..

118 Commits

Author SHA1 Message Date
bunnei
35e1118766 gl_rasterizer_cache: Mark surface copy destinations as modified. 2019-02-06 21:54:25 -05:00
bunnei
dd1aab5446 gl_rasterizer: Implement a more accurate fermi 2D copy.
- This is a blit, use the blit registers.
2019-02-06 21:54:21 -05:00
bunnei
ca482997fe Merge pull request #2091 from FearlessTobi/port-4603
Port citra-emu/citra#4603: "gdbstub: only let Execute breakpoints write/restore BKPT opcodes into target memory"
2019-02-06 21:51:46 -05:00
bunnei
e09f1c92fb Merge pull request #2021 from ReinUsesLisp/disk-cache
gl_shader_cache: Disk based shader cache
2019-02-06 21:47:20 -05:00
ReinUsesLisp
dfd14618f7 cmake: Fix title bar issue 2019-02-06 22:23:41 -03:00
Frederic L
d0ac624403 gl_shader_disk_cache: Check LZ4 size limit
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-02-06 22:23:41 -03:00
Frederic L
9f0b247cf6 gl_shader_disk_cache: Consider compressed size zero as an error
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-02-06 22:23:41 -03:00
Frederic L
8ff2ce5207 cmake: Use CMAKE_COMMAND instead of "cmake"
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-02-06 22:23:41 -03:00
ReinUsesLisp
e6a2245304 gl_shader_disk_cache: Use unordered containers 2019-02-06 22:23:41 -03:00
ReinUsesLisp
e147ed4fc0 gl_shader_cache: Fixup GLSL unique identifiers 2019-02-06 22:23:40 -03:00
Michael
4ffb487251 cmake: Fixup application string
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-02-06 22:23:40 -03:00
ReinUsesLisp
bd928e70ed loading_screen: Unchunk progress bar 2019-02-06 22:23:40 -03:00
ReinUsesLisp
eb73247433 gl_shader_cache: Link loading screen with disk shader cache load 2019-02-06 22:23:40 -03:00
ReinUsesLisp
df0f31f44e gl_shader_cache: Set GL_PROGRAM_SEPARABLE to dumped shaders
i965 (and probably all mesa drivers) require GL_PROGRAM_SEPARABLE when using
glProgramBinary. This is probably required by the standard but it's ignored by
permisive proprietary drivers.
2019-02-06 22:23:40 -03:00
ReinUsesLisp
7fefec585c gl_shader_disk_cache: Pass core system as argument and guard against games without title ids 2019-02-06 22:23:40 -03:00
ReinUsesLisp
2bc6a699dc gl_shader_disk_cache: Guard reads and writes against failure 2019-02-06 22:23:40 -03:00
ReinUsesLisp
750abcc23d gl_shader_disk_cache: Address miscellaneous feedback 2019-02-06 22:23:40 -03:00
ReinUsesLisp
8ee3666a3c gl_shader_disk_cache: Pass return values returning instead of by parameters 2019-02-06 22:23:40 -03:00
ReinUsesLisp
ed956569a4 gl_shader_disk_cache: Compress program binaries using LZ4 2019-02-06 22:23:39 -03:00
ReinUsesLisp
f087639e4a gl_shader_disk_cache: Compress GLSL code using LZ4 2019-02-06 22:23:39 -03:00
ReinUsesLisp
cfb20c4c9d gl_shader_disk_cache: Save GLSL and entries into the precompiled file 2019-02-06 22:23:39 -03:00
ReinUsesLisp
e78da8dc1f settings: Hide shader cache behind a setting 2019-02-06 22:20:57 -03:00
ReinUsesLisp
be4641c43f gl_shader_disk_cache: Invalidate shader cache changes with CMake hash 2019-02-06 22:20:57 -03:00
ReinUsesLisp
a3703f5767 gl_shader_cache: Refactor to support disk shader cache 2019-02-06 22:20:57 -03:00
ReinUsesLisp
4039086226 gl_shader_disk_cache: Add transferable cache invalidation 2019-02-06 22:20:57 -03:00
ReinUsesLisp
a1faed9950 gl_shader_disk_cache: Add precompiled load 2019-02-06 22:20:57 -03:00
ReinUsesLisp
57fb15d2a3 gl_shader_disk_cache: Add precompiled save 2019-02-06 22:20:57 -03:00
ReinUsesLisp
3435cd8d5e gl_shader_disk_cache: Add transferable load 2019-02-06 22:20:57 -03:00
ReinUsesLisp
b1efceec89 gl_shader_disk_cache: Add transferable stores 2019-02-06 22:20:57 -03:00
ReinUsesLisp
98be5a4928 gl_shader_disk_cache: Add ShaderDiskCacheOpenGL class and helpers 2019-02-06 22:20:57 -03:00
ReinUsesLisp
145c3ac89e gl_shader_disk_cache: Add file and move BaseBindings declaration 2019-02-06 22:20:57 -03:00
ReinUsesLisp
c2c5260fd7 gl_shader_decompiler: Remove name entries 2019-02-06 22:20:57 -03:00
ReinUsesLisp
8b11368671 gl_shader_util: Add parameter to handle retrievable programs 2019-02-06 22:20:57 -03:00
ReinUsesLisp
0ed5d728ca rasterizer_interface: Add disk cache entry for the rasterizer 2019-02-06 22:20:57 -03:00
ReinUsesLisp
84412591c9 file_util: Add shader directory 2019-02-06 22:20:57 -03:00
ReinUsesLisp
049050856f shader_decode: Implement LDG and basic cbuf tracking 2019-02-06 22:20:57 -03:00
bunnei
10ab714fe0 Merge pull request #2042 from ReinUsesLisp/nouveau-tex
maxwell_3d: Allow texture handles with TIC id zero
2019-02-06 20:19:20 -05:00
bunnei
40ac058557 Merge pull request #2071 from ReinUsesLisp/dsa-texture
gl_rasterizer: Use DSA for textures and move swizzling to texture state
2019-02-06 20:17:59 -05:00
Dimitri ALBORA
8b800369ea gdbstub: only let Execute breakpoints write/restore BKPT opcodes into target memory 2019-02-06 19:07:35 +01:00
bunnei
c357d8f6f7 Merge pull request #2057 from FearlessTobi/port-4586
Port citra-emu/citra#4586: "Use QPixmap/QIcon for background color selection button"
2019-02-06 12:37:57 -05:00
bunnei
b34ae2235d Merge pull request #2086 from FearlessTobi/port-4583
Port citra-emu/citra#4583: "citra_qt: Fix saving screenshot when no file extension is provided"
2019-02-06 12:33:35 -05:00
bunnei
40cd299f01 Merge pull request #2087 from lioncash/const
service/nvflinger, service/vi: Improve error case handling
2019-02-06 12:33:13 -05:00
bunnei
67c1f31251 Merge pull request #2088 from jroweboy/h
QT: Fix the loading screen 'H' switch logo to not glitch out
2019-02-05 21:06:39 -05:00
James Rowe
c82b0afb69 QT: Fix the loading screen 'H' switch logo to not glitch out 2019-02-05 18:24:15 -07:00
Lioncash
ef073ff117 service/nvflinger,service/vi: Handle failure cases with exposed API
Converts many of the Find* functions to return a std::optional<T> as
opposed to returning the raw return values directly. This allows
removing a few assertions and handles error cases like the service
itself does.
2019-02-05 18:03:28 -05:00
bunnei
7aa7d8f4ff Merge pull request #2085 from ReinUsesLisp/cube-minus-one
video_core/texture: Fix BitField size for depth_minus_one
2019-02-05 17:15:26 -05:00
xperia64
f598490b57 Fix crash when no files are selected 2019-02-05 22:40:23 +01:00
xperia64
284536a626 Add file extension to screenshot filename if not provided 2019-02-05 22:31:37 +01:00
Lioncash
7320c667df service/nvflinger: Mark FindVsyncEvent() as a const member function
This member function doesn't actually modify instance state, so it can
be marked as a const member function.
2019-02-05 15:57:29 -05:00
Lioncash
3c02cdcc57 service/nvflinger: Rename GetVsyncEvent() to FindVsyncEvent()
This was missed within #2075. Renames the member function to make it
consistent with the rest of the Find* functions.
2019-02-05 15:55:18 -05:00
bunnei
72c70d6808 Merge pull request #2081 from ReinUsesLisp/lmem-64
shader_ir/memory: Add LD_L 64 bits loads
2019-02-05 09:17:48 -05:00
ReinUsesLisp
b5e685b297 video_core/texture: Fix BitField size for depth_minus_one 2019-02-05 04:32:06 -03:00
bunnei
bb4549a73d Merge pull request #2082 from FernandoS27/txq-stl
Fix TXQ not using the component mask.
2019-02-04 20:22:32 -05:00
Mat M
a568cd805b Update src/video_core/engines/shader_bytecode.h
Co-Authored-By: FernandoS27 <fsahmkow27@gmail.com>
2019-02-03 21:27:26 -04:00
Fernando Sahmkow
0306c50339 Fix TXQ not using the component mask. 2019-02-03 18:17:18 -04:00
ReinUsesLisp
dfa7be5ddf shader_ir/memory: Add ST_L 64 and 128 bits stores 2019-02-03 19:08:10 -03:00
ReinUsesLisp
2bdbb90af7 video_core: Assert on invalid GPU to CPU address queries 2019-02-03 04:58:40 -03:00
ReinUsesLisp
04e68e9738 maxwell_3d: Allow sampler handles with TSC id zero 2019-02-03 04:58:40 -03:00
ReinUsesLisp
390721a561 maxwell_3d: Allow texture handles with TIC id zero
Also remove "enabled" field from Tegra::Texture::FullTextureInfo because
it would become unused.
2019-02-03 04:58:24 -03:00
ReinUsesLisp
e01a9de35f memory_manager: Check for reserved page status 2019-02-03 04:58:24 -03:00
ReinUsesLisp
f61c1ed246 shader_ir/memory: Add LD_L 128 bits loads 2019-02-03 00:35:34 -03:00
ReinUsesLisp
9feb68085d shader_bytecode: Rename BytesN enums to BitsN 2019-02-03 00:25:40 -03:00
ReinUsesLisp
0be835132c shader_ir/memory: Add LD_L 64 bits loads 2019-02-03 00:25:40 -03:00
bunnei
eceab45dac Merge pull request #2074 from ReinUsesLisp/shader-ir-unify-offset
shader_ir: Unify constant buffer offset values
2019-02-01 13:24:04 -05:00
bunnei
4076d8fe3e Merge pull request #2073 from lioncash/opus
hwopus: Implement DecodeInterleaved (the newest variant)
2019-02-01 13:02:16 -05:00
bunnei
2d226ff8ac Merge pull request #2067 from ReinUsesLisp/workaround-fb
gl_rasterizer: Workaround invalid zeta clears
2019-02-01 12:50:09 -05:00
bunnei
11e7c1244c Merge pull request #2078 from lioncash/timer
kernel: Remove the Timer class
2019-02-01 12:49:16 -05:00
bunnei
3a6eef27a2 Merge pull request #2079 from ReinUsesLisp/remove-fill
video_core: Remove unused Fill related code
2019-02-01 12:48:38 -05:00
ReinUsesLisp
26f8a700a7 rasterizer_interface: Remove unused AccelerateFill operation 2019-02-01 03:02:22 -03:00
ReinUsesLisp
13222f94c0 video_core: Remove unused Fill surface type 2019-02-01 02:57:47 -03:00
Lioncash
414cc1eb1f kernel: Remove the Timer class
A holdover from citra, the Horizon kernel on the switch has no
prominent kernel object that functions as a timer. At least not
to the degree of sophistication that this class provided.

As such, this can be removed entirely. This class also wasn't used at
all in any meaningful way within the core, so this was just code sitting
around doing nothing. This also allows removing a few things from the
main KernelCore class that allows it to use slightly less resources
overall (though very minor and not anything really noticeable).
2019-01-31 23:05:15 -05:00
bunnei
b0b027d2d0 Merge pull request #2072 from lioncash/service
service: Update function tables
2019-01-31 15:19:44 -05:00
bunnei
db21ac2627 Merge pull request #2077 from lioncash/virt
kernel/wait_object: Devirtualize functions related to manipulating the thread list directly
2019-01-31 15:19:02 -05:00
bunnei
d6f5f5cafa Merge pull request #2075 from lioncash/find
service/nvflinger: Minor renaming changes
2019-01-31 11:08:36 -05:00
ReinUsesLisp
3e80b08944 gl_rasterizer_cache: Fixup test clause 2019-01-30 19:10:35 -03:00
Mat M
911587fb8d gl_rasterizer_cache: Guard clause swizzle testing
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-01-30 19:10:35 -03:00
ReinUsesLisp
220df45b7d gl_state: Remove texture target tracking 2019-01-30 19:10:35 -03:00
ReinUsesLisp
704744bb72 gl_rasterizer_cache: Move swizzling to textures instead of state 2019-01-30 19:10:35 -03:00
ReinUsesLisp
3bbaa98c78 gl_state: Use DSA and multi bind to update texture bindings 2019-01-30 19:10:11 -03:00
ReinUsesLisp
4b676e7786 gl_rasterizer: Use DSA for textures 2019-01-30 19:10:11 -03:00
Hexagon12
e597665569 Merge pull request #1818 from ccawley2011/patch-1
Add missing environment variables to travis-ci.env
2019-01-30 19:53:34 +02:00
Lioncash
a3cdd773c3 kernel/wait_object: Devirtualize functions related to manipulating the thread list directly
No inheritors of the WaitObject class actually make use of their own
implementations of these functions, so they can be made non-virtual.

It's also kind of sketchy to allow overriding how the threads get added
to the list anyways, given the kernel itself on the actual hardware
doesn't seem to customize based off this.
2019-01-30 12:50:37 -05:00
Lioncash
4596ef5274 kernel/timer: Remove unnecessary WakeupAllWaitingThreads() override
This implementation just calls the base class variant of the function,
so this isn't necessary.
2019-01-30 12:45:00 -05:00
Lioncash
1a302d4d47 kernel/readable_event: Remove unnecessary WakeupAllWaitingThreads() override
This just calls the base variant of the function, so it can be removed.
2019-01-30 12:45:00 -05:00
Hexagon12
35480167b1 Merge pull request #2076 from lioncash/enc
video_core/dma_pusher: Silence C4828 warnings
2019-01-30 19:42:15 +02:00
Lioncash
0b594f3344 video_core/dma_pusher: Silence C4828 warnings
This was previously causing:

warning C4828: The file contains a character starting at offset 0xa33
that is illegal in the current source character set (codepage 65001).

warnings on Windows when compiling yuzu.
2019-01-30 12:36:31 -05:00
bunnei
92b18345a8 Merge pull request #1485 from FernandoS27/render-info
Add more info into textures' object labels
2019-01-30 12:35:56 -05:00
Lioncash
a897feb21e hwopus: Implement DecodeInterleaved
This functions almost identically to DecodeInterleavedWithPerfOld,
however this function also has the ability to reset the decoder context.

This is documented as a potentially desirable thing in the libopus
manual in some circumstances as it says for the OPUS_RESET_STATE ctl:

"This should be called when switching streams in order to prevent the
back to back decoding from giving different result from one at a time
decoding."
2019-01-30 11:35:41 -05:00
Lioncash
ba14fb42e4 service/nvflinger: Make FindBufferQueueId() a const member function
This member function doesn't actually modify instance state, so it can
be const-qualified.
2019-01-30 11:14:08 -05:00
Lioncash
1d11def9c4 service/nvflinger: Rename Get prefix on function to Find
This more accurately describes what the function is actually attempting
to do (it's not a simple trivial getter).
2019-01-30 11:11:32 -05:00
ReinUsesLisp
477d616f7d shader_ir: Unify constant buffer offset values
Constant buffer values on the shader IR were using different offsets if
the access direct or indirect. cbuf34 has a non-multiplied offset while
cbuf36 does. On shader decoding this commit multiplies it by four on
cbuf34 queries.
2019-01-30 02:45:50 -03:00
Lioncash
07b86dc28c hwopus: Deduplicate the decoding code within DecodeInterleavedOld and DecodeInterleavedWithPerfOld
Keeps the logic in one spot for use by both functions.
2019-01-29 22:53:35 -05:00
Lioncash
44f39bfb68 hwopus: Replace std::optional<std::reference_wrapper<u64>> with u64*
This doesn't really offer anything over the use of a direct pointer, so
we can just use that instead.
2019-01-29 22:53:35 -05:00
Lioncash
eb1a3c1f4a hwopus: Mark local variables as const where applicable
Makes non-mutable state more explicit.
2019-01-29 22:53:35 -05:00
Lioncash
06887c80a5 hwopus: Fill in the rest of the unknown service function names
Filled in via information provided by SwitchBrew.
2019-01-29 22:53:34 -05:00
Lioncash
5078106068 service/ns: Update function tables
Updates function tables based off information provided by SwitchBrew
2019-01-29 22:50:26 -05:00
Lioncash
1710847d08 service/ncm: Update function tables
Updates function tables based off information provided by SwitchBrew
2019-01-29 22:50:26 -05:00
Lioncash
0db8918947 service/audio: Update function tables
Updates function tables based off information provided by SwitchBrew.
2019-01-29 22:50:26 -05:00
Lioncash
c3affdd162 service/am/applet_ae: Update function tables
Updates function tables based off information provided by SwitchBrew.
2019-01-29 22:50:26 -05:00
Lioncash
09727a6a97 service/fsp-srv: Update function tables
Updates function tables based off information provided by SwitchBrew.
2019-01-29 22:50:26 -05:00
Lioncash
c6c1c1b45f service/btm: Update function tables
Updates function tables based off information provided by SwitchBrew
2019-01-29 22:50:26 -05:00
Lioncash
0c59e6265f service/btdrv: Update function tables
Updates function tables based off information provided by SwitchBrew.
2019-01-29 22:50:18 -05:00
bunnei
3c3d9afd61 Merge pull request #2070 from ReinUsesLisp/cubearray-view
gl_shader_cache: Fix texture view for cubemaps as cubemap arrays
2019-01-29 22:27:08 -05:00
bunnei
69884d8a8f Merge pull request #2069 from lioncash/vi
service/nvflinger: Add the null display type
2019-01-29 22:25:26 -05:00
bunnei
4c818b60e3 Merge pull request #1987 from ReinUsesLisp/explicit-shader-ldg
gl_shader_cache: Use explicit bindings
2019-01-29 22:15:55 -05:00
ReinUsesLisp
52c326c301 gl_shader_cache: Use explicit bindings 2019-01-30 00:11:02 -03:00
bunnei
52bb524526 Merge pull request #1960 from ReinUsesLisp/shader-ir-ldg
video_core: Implement LDG through heuristics based on IR
2019-01-29 22:02:01 -05:00
ReinUsesLisp
f58a6152fc gl_shader_cache: Fix texture view for cubemaps as cubemap arrays
Cubemaps are considered layered and to create a texture view the texture
mustn't be a layered texture, resulting in cubemaps being bound as
cubemap arrays. To fix this issue this commit introduces an extra
surface parameter called "is_array" and uses this to query for texture
view creation.

Now that texture views for cubemaps are actually being created, this
also fixes the number of layers created for the texture view (since they
have to be 6 to create a texture view of cubemaps).
2019-01-29 23:49:02 -03:00
ReinUsesLisp
07692230ca gl_rasterizer: Workaround invalid zeta clears
Some games (like Xenoblade Chronicles 2) clear both depth and stencil
buffers while there's a depth-only texture attached (e.g. D16 Unorm).
This commit reads the zeta format of the bound surface on
ConfigureFramebuffers and returns if depth and/or stencil attachments
were set. This is ignored on DrawArrays but on Clear it's used to just
clear those attachments, bypassing an OpenGL error.
2019-01-29 23:47:33 -03:00
Lioncash
49a3571e76 service/psc: Update function tables
Updates the function tables based off information on SwitchBrew. Gets
rid of a swath of unknown names.
2019-01-29 21:16:24 -05:00
Lioncash
7e92497402 nvflinger: Add the Null display
In addition to the default, external, EDID, and internal displays,
there's also a null display provided as well, which as the name
suggests, does nothing but discard all commands given to it. This is
provided for completeness.
2019-01-29 21:13:33 -05:00
Lioncash
679e63550a nvflinger: Change log message in OpenDisplay to be a debug log instead of a warning
Opening a display isn't really a thing to warn about. It's an expected
thing, so this can be a debug log. This also alters the string to
indicate the display name better.

Opening "Default" display reads a little nicer compared to Opening
display Default.
2019-01-29 21:13:33 -05:00
Lioncash
3d81a8efd5 nvflinger: Remove unnecessary header inclusions 2019-01-29 21:13:33 -05:00
Lioncash
d9f9bb7552 nvflinger: Mark locals const where applicable
Makes non-mutable state more explicit.
2019-01-29 21:13:33 -05:00
Lioncash
f45c25aaba nvflinger: Use a std::array for the available displays instead of std::vector
The built-in set of displays is fixed, so we can utilize an array
instead of a vector here.
2019-01-29 21:13:33 -05:00
xperia64
32eb080e02 Use QPixmap/QIcon for background color selection button 2019-01-26 15:08:54 +01:00
FernandoS27
7b9c982d29 Add more info into textures' object labels 2018-12-09 17:22:29 -04:00
Cameron Cawley
e0b0947437 Add missing environment variables to travis-ci.env 2018-11-28 15:12:03 +00:00
105 changed files with 2972 additions and 1450 deletions

View File

@@ -6,6 +6,8 @@ TRAVIS_BRANCH
TRAVIS_BUILD_ID
TRAVIS_BUILD_NUMBER
TRAVIS_COMMIT
TRAVIS_COMMIT_RANGE
TRAVIS_EVENT_TYPE
TRAVIS_JOB_ID
TRAVIS_JOB_NUMBER
TRAVIS_REPO_SLUG

View File

@@ -419,19 +419,6 @@ function(create_target_directory_groups target_name)
endforeach()
endfunction()
# Gets a UTC timstamp and sets the provided variable to it
function(get_timestamp _var)
string(TIMESTAMP timestamp UTC)
set(${_var} "${timestamp}" PARENT_SCOPE)
endfunction()
# generate git/build information
include(GetGitRevisionDescription)
get_git_head_revision(GIT_REF_SPEC GIT_REV)
git_describe(GIT_DESC --always --long --dirty)
git_branch_name(GIT_BRANCH)
get_timestamp(BUILD_DATE)
enable_testing()
add_subdirectory(externals)
add_subdirectory(src)

View File

@@ -0,0 +1,94 @@
# Gets a UTC timstamp and sets the provided variable to it
function(get_timestamp _var)
string(TIMESTAMP timestamp UTC)
set(${_var} "${timestamp}" PARENT_SCOPE)
endfunction()
list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
# generate git/build information
include(GetGitRevisionDescription)
get_git_head_revision(GIT_REF_SPEC GIT_REV)
git_describe(GIT_DESC --always --long --dirty)
git_branch_name(GIT_BRANCH)
get_timestamp(BUILD_DATE)
# Generate cpp with Git revision from template
# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
set(REPO_NAME "")
set(BUILD_VERSION "0")
if (BUILD_REPOSITORY)
# regex capture the string nightly or canary into CMAKE_MATCH_1
string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
if (${CMAKE_MATCH_COUNT} GREATER 0)
# capitalize the first letter of each word in the repo name.
string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
foreach(WORD ${REPO_NAME_LIST})
string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
string(SUBSTRING ${WORD} 1 -1 REMAINDER)
string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
endforeach()
if (BUILD_TAG)
string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
if (${CMAKE_MATCH_COUNT} GREATER 0)
set(BUILD_VERSION ${CMAKE_MATCH_1})
endif()
if (BUILD_VERSION)
# This leaves a trailing space on the last word, but we actually want that
# because of how it's styled in the title bar.
set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
else()
set(BUILD_FULLNAME "")
endif()
endif()
endif()
endif()
# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
set(VIDEO_CORE "${SRC_DIR}/src/video_core")
set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
"${VIDEO_CORE}/shader/decode/bfe.cpp"
"${VIDEO_CORE}/shader/decode/bfi.cpp"
"${VIDEO_CORE}/shader/decode/conversion.cpp"
"${VIDEO_CORE}/shader/decode/ffma.cpp"
"${VIDEO_CORE}/shader/decode/float_set.cpp"
"${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"
"${VIDEO_CORE}/shader/decode/other.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
"${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/shift.cpp"
"${VIDEO_CORE}/shader/decode/video.cpp"
"${VIDEO_CORE}/shader/decode/xmad.cpp"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
)
set(COMBINED "")
foreach (F IN LISTS HASH_FILES)
file(READ ${F} TMP)
set(COMBINED "${COMBINED}${TMP}")
endforeach()
string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)

View File

@@ -1,42 +1,69 @@
# Generate cpp with Git revision from template
# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
set(REPO_NAME "")
set(BUILD_VERSION "0")
if ($ENV{CI})
if ($ENV{TRAVIS})
# Add a custom command to generate a new shader_cache_version hash when any of the following files change
# NOTE: This is an approximation of what files affect shader generation, its possible something else
# could affect the result, but much more unlikely than the following files. Keeping a list of files
# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
if (DEFINED ENV{CI})
if (DEFINED ENV{TRAVIS})
set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
set(BUILD_TAG $ENV{TRAVIS_TAG})
elseif($ENV{APPVEYOR})
elseif(DEFINED ENV{APPVEYOR})
set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
endif()
# regex capture the string nightly or canary into CMAKE_MATCH_1
string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
if (${CMAKE_MATCH_COUNT} GREATER 0)
# capitalize the first letter of each word in the repo name.
string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
foreach(WORD ${REPO_NAME_LIST})
string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
string(SUBSTRING ${WORD} 1 -1 REMAINDER)
string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
endforeach()
if (BUILD_TAG)
string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
if (${CMAKE_MATCH_COUNT} GREATER 0)
set(BUILD_VERSION ${CMAKE_MATCH_1})
endif()
if (BUILD_VERSION)
# This leaves a trailing space on the last word, but we actually want that
# because of how it's styled in the title bar.
set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
else()
set(BUILD_FULLNAME "")
endif()
endif()
endif()
endif()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
add_custom_command(OUTPUT scm_rev.cpp
COMMAND ${CMAKE_COMMAND}
-DSRC_DIR="${CMAKE_SOURCE_DIR}"
-DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
-DBUILD_TAG="${BUILD_TAG}"
-P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
DEPENDS
# WARNING! It was too much work to try and make a common location for this list,
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
"${VIDEO_CORE}/shader/decode/bfe.cpp"
"${VIDEO_CORE}/shader/decode/bfi.cpp"
"${VIDEO_CORE}/shader/decode/conversion.cpp"
"${VIDEO_CORE}/shader/decode/ffma.cpp"
"${VIDEO_CORE}/shader/decode/float_set.cpp"
"${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"
"${VIDEO_CORE}/shader/decode/other.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
"${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/shift.cpp"
"${VIDEO_CORE}/shader/decode/video.cpp"
"${VIDEO_CORE}/shader/decode/xmad.cpp"
"${VIDEO_CORE}/shader/decode.cpp"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
# and also check that the scm_rev files haven't changed
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
# technically we should regenerate if the git version changed, but its not worth the effort imo
"${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
)
add_library(common STATIC
alignment.h

View File

@@ -35,6 +35,7 @@
#define KEYS_DIR "keys"
#define LOAD_DIR "load"
#define DUMP_DIR "dump"
#define SHADER_DIR "shader"
#define LOG_DIR "log"
// Filenames

View File

@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
// TODO: Put the logs in a better location for each OS

View File

@@ -31,6 +31,7 @@ enum class UserPath {
SDMCDir,
LoadDir,
DumpDir,
ShaderDir,
SysDataDir,
UserDir,
};

View File

@@ -11,6 +11,7 @@
#define BUILD_DATE "@BUILD_DATE@"
#define BUILD_FULLNAME "@BUILD_FULLNAME@"
#define BUILD_VERSION "@BUILD_VERSION@"
#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
namespace Common {
@@ -21,6 +22,7 @@ const char g_build_name[] = BUILD_NAME;
const char g_build_date[] = BUILD_DATE;
const char g_build_fullname[] = BUILD_FULLNAME;
const char g_build_version[] = BUILD_VERSION;
const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
} // namespace

View File

@@ -13,5 +13,6 @@ extern const char g_build_name[];
extern const char g_build_date[];
extern const char g_build_fullname[];
extern const char g_build_version[];
extern const char g_shader_cache_version[];
} // namespace Common

View File

@@ -140,8 +140,6 @@ add_library(core STATIC
hle/kernel/svc_wrap.h
hle/kernel/thread.cpp
hle/kernel/thread.h
hle/kernel/timer.cpp
hle/kernel/timer.h
hle/kernel/vm_manager.cpp
hle/kernel/vm_manager.h
hle/kernel/wait_object.cpp

View File

@@ -123,7 +123,7 @@ struct System::Impl {
Service::Init(service_manager, *virtual_filesystem);
GDBStub::Init();
renderer = VideoCore::CreateRenderer(emu_window);
renderer = VideoCore::CreateRenderer(emu_window, system);
if (!renderer->Init()) {
return ResultStatus::ErrorVideoCore;
}
@@ -175,6 +175,7 @@ struct System::Impl {
return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
static_cast<u32>(load_result));
}
status = ResultStatus::Success;
return status;
}

View File

@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
bp->second.len, bp->second.addr, static_cast<int>(type));
Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
Core::System::GetInstance().InvalidateCpuInstructionCaches();
if (type == BreakpointType::Execute) {
Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
Core::System::GetInstance().InvalidateCpuInstructionCaches();
}
p.erase(addr);
}
@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
breakpoint.addr = addr;
breakpoint.len = len;
Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
Memory::WriteBlock(addr, btrap.data(), btrap.size());
Core::System::GetInstance().InvalidateCpuInstructionCaches();
if (type == BreakpointType::Execute) {
Memory::WriteBlock(addr, btrap.data(), btrap.size());
Core::System::GetInstance().InvalidateCpuInstructionCaches();
}
p.insert({addr, breakpoint});
LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",

View File

@@ -18,7 +18,6 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/timer.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
@@ -86,27 +85,12 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
}
}
/// The timer callback event, called when a timer is fired
static void TimerCallback(u64 timer_handle, int cycles_late) {
const auto proper_handle = static_cast<Handle>(timer_handle);
const auto& system = Core::System::GetInstance();
SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);
if (timer == nullptr) {
LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
return;
}
timer->Signal(cycles_late);
}
struct KernelCore::Impl {
void Initialize(KernelCore& kernel) {
Shutdown();
InitializeSystemResourceLimit(kernel);
InitializeThreads();
InitializeTimers();
}
void Shutdown() {
@@ -122,9 +106,6 @@ struct KernelCore::Impl {
thread_wakeup_callback_handle_table.Clear();
thread_wakeup_event_type = nullptr;
timer_callback_handle_table.Clear();
timer_callback_event_type = nullptr;
named_ports.clear();
}
@@ -146,11 +127,6 @@ struct KernelCore::Impl {
CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
}
void InitializeTimers() {
timer_callback_handle_table.Clear();
timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
}
std::atomic<u32> next_object_id{0};
std::atomic<u64> next_process_id{Process::ProcessIDMin};
std::atomic<u64> next_thread_id{1};
@@ -161,12 +137,6 @@ struct KernelCore::Impl {
SharedPtr<ResourceLimit> system_resource_limit;
/// The event type of the generic timer callback event
CoreTiming::EventType* timer_callback_event_type = nullptr;
// TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future,
// allowing us to simply use a pool index or similar.
Kernel::HandleTable timer_callback_handle_table;
CoreTiming::EventType* thread_wakeup_event_type = nullptr;
// TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
// allowing us to simply use a pool index or similar.
@@ -198,10 +168,6 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle
return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
}
SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const {
return impl->timer_callback_handle_table.Get<Timer>(handle);
}
void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
impl->process_list.push_back(std::move(process));
}
@@ -247,18 +213,10 @@ u64 KernelCore::CreateNewProcessID() {
return impl->next_process_id++;
}
ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) {
return impl->timer_callback_handle_table.Create(timer);
}
CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
return impl->thread_wakeup_event_type;
}
CoreTiming::EventType* KernelCore::TimerCallbackEventType() const {
return impl->timer_callback_event_type;
}
Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
return impl->thread_wakeup_callback_handle_table;
}

View File

@@ -22,7 +22,6 @@ class HandleTable;
class Process;
class ResourceLimit;
class Thread;
class Timer;
/// Represents a single instance of the kernel.
class KernelCore {
@@ -51,9 +50,6 @@ public:
/// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;
/// Retrieves a shared pointer to a Timer instance within the timer callback handle table.
SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const;
/// Adds the given shared pointer to an internal list of active processes.
void AppendNewProcess(SharedPtr<Process> process);
@@ -82,7 +78,6 @@ private:
friend class Object;
friend class Process;
friend class Thread;
friend class Timer;
/// Creates a new object ID, incrementing the internal object ID counter.
u32 CreateNewObjectID();
@@ -93,15 +88,9 @@ private:
/// Creates a new thread ID, incrementing the internal thread ID counter.
u64 CreateNewThreadID();
/// Creates a timer callback handle for the given timer.
ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer);
/// Retrieves the event type used for thread wakeup callbacks.
CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
/// Retrieves the event type used for timer callbacks.
CoreTiming::EventType* TimerCallbackEventType() const;
/// Provides a reference to the thread wakeup callback handle table.
Kernel::HandleTable& ThreadWakeupCallbackHandleTable();

View File

@@ -16,7 +16,6 @@ bool Object::IsWaitable() const {
case HandleType::ReadableEvent:
case HandleType::Thread:
case HandleType::Process:
case HandleType::Timer:
case HandleType::ServerPort:
case HandleType::ServerSession:
return true;

View File

@@ -25,7 +25,6 @@ enum class HandleType : u32 {
Thread,
Process,
AddressArbiter,
Timer,
ResourceLimit,
ClientPort,
ServerPort,

View File

@@ -44,8 +44,4 @@ ResultCode ReadableEvent::Reset() {
return RESULT_SUCCESS;
}
void ReadableEvent::WakeupAllWaitingThreads() {
WaitObject::WakeupAllWaitingThreads();
}
} // namespace Kernel

View File

@@ -39,8 +39,6 @@ public:
bool ShouldWait(Thread* thread) const override;
void Acquire(Thread* thread) override;
void WakeupAllWaitingThreads() override;
/// Unconditionally clears the readable event's state.
void Clear();

View File

@@ -1,88 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/timer.h"
namespace Kernel {
Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {}
Timer::~Timer() = default;
SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
SharedPtr<Timer> timer(new Timer(kernel));
timer->reset_type = reset_type;
timer->signaled = false;
timer->name = std::move(name);
timer->initial_delay = 0;
timer->interval_delay = 0;
timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap();
return timer;
}
bool Timer::ShouldWait(Thread* thread) const {
return !signaled;
}
void Timer::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
if (reset_type == ResetType::OneShot)
signaled = false;
}
void Timer::Set(s64 initial, s64 interval) {
// Ensure we get rid of any previous scheduled event
Cancel();
initial_delay = initial;
interval_delay = interval;
if (initial == 0) {
// Immediately invoke the callback
Signal(0);
} else {
CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(),
callback_handle);
}
}
void Timer::Cancel() {
CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle);
}
void Timer::Clear() {
signaled = false;
}
void Timer::WakeupAllWaitingThreads() {
WaitObject::WakeupAllWaitingThreads();
}
void Timer::Signal(int cycles_late) {
LOG_TRACE(Kernel, "Timer {} fired", GetObjectId());
signaled = true;
// Resume all waiting threads
WakeupAllWaitingThreads();
if (interval_delay != 0) {
// Reschedule the timer with the interval delay
CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late,
kernel.TimerCallbackEventType(), callback_handle);
}
}
} // namespace Kernel

View File

@@ -1,90 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/wait_object.h"
namespace Kernel {
class KernelCore;
class Timer final : public WaitObject {
public:
/**
* Creates a timer
* @param kernel The kernel instance to create the timer callback handle for.
* @param reset_type ResetType describing how to create the timer
* @param name Optional name of timer
* @return The created Timer
*/
static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type,
std::string name = "Unknown");
std::string GetTypeName() const override {
return "Timer";
}
std::string GetName() const override {
return name;
}
static const HandleType HANDLE_TYPE = HandleType::Timer;
HandleType GetHandleType() const override {
return HANDLE_TYPE;
}
ResetType GetResetType() const {
return reset_type;
}
u64 GetInitialDelay() const {
return initial_delay;
}
u64 GetIntervalDelay() const {
return interval_delay;
}
bool ShouldWait(Thread* thread) const override;
void Acquire(Thread* thread) override;
void WakeupAllWaitingThreads() override;
/**
* Starts the timer, with the specified initial delay and interval.
* @param initial Delay until the timer is first fired
* @param interval Delay until the timer is fired after the first time
*/
void Set(s64 initial, s64 interval);
void Cancel();
void Clear();
/**
* Signals the timer, waking up any waiting threads and rescheduling it
* for the next interval.
* This method should not be called from outside the timer callback handler,
* lest multiple callback events get scheduled.
*/
void Signal(int cycles_late);
private:
explicit Timer(KernelCore& kernel);
~Timer() override;
ResetType reset_type; ///< The ResetType of this timer
u64 initial_delay; ///< The delay until the timer fires for the first time
u64 interval_delay; ///< The delay until the timer fires after the first time
bool signaled; ///< Whether the timer has been signaled or not
std::string name; ///< Name of timer (optional)
/// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
Handle callback_handle;
};
} // namespace Kernel

View File

@@ -33,19 +33,19 @@ public:
* Add a thread to wait on this object
* @param thread Pointer to thread to add
*/
virtual void AddWaitingThread(SharedPtr<Thread> thread);
void AddWaitingThread(SharedPtr<Thread> thread);
/**
* Removes a thread from waiting on this object (e.g. if it was resumed already)
* @param thread Pointer to thread to remove
*/
virtual void RemoveWaitingThread(Thread* thread);
void RemoveWaitingThread(Thread* thread);
/**
* Wake up all threads waiting on this object that can be awoken, in priority order,
* and set the synchronization result and output of the thread.
*/
virtual void WakeupAllWaitingThreads();
void WakeupAllWaitingThreads();
/**
* Wakes up a single thread waiting on this object.

View File

@@ -322,14 +322,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c
void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");
// TODO(Subv): Find out how AM determines the display to use, for now just
// create the layer in the Default display.
u64 display_id = nvflinger->OpenDisplay("Default");
u64 layer_id = nvflinger->CreateLayer(display_id);
const auto display_id = nvflinger->OpenDisplay("Default");
const auto layer_id = nvflinger->CreateLayer(*display_id);
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push(layer_id);
rb.Push(*layer_id);
}
void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {

View File

@@ -249,7 +249,8 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
{300, nullptr, "OpenOverlayAppletProxy"},
{350, nullptr, "OpenSystemApplicationProxy"},
{400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"},
{401, nullptr, "GetSystemAppletControllerForDebug"},
{410, nullptr, "GetSystemAppletControllerForDebug"},
{1000, nullptr, "GetDebugFunctions"},
};
// clang-format on

View File

@@ -12,6 +12,7 @@ namespace Service::Audio {
class IAudioIn final : public ServiceFramework<IAudioIn> {
public:
IAudioIn() : ServiceFramework("IAudioIn") {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "GetAudioInState"},
{1, nullptr, "StartAudioIn"},
@@ -28,16 +29,24 @@ public:
{12, nullptr, "SetAudioInDeviceGain"},
{13, nullptr, "GetAudioInDeviceGain"},
};
// clang-format on
RegisterHandlers(functions);
}
~IAudioIn() = default;
};
AudInU::AudInU() : ServiceFramework("audin:u") {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "ListAudioIns"}, {1, nullptr, "OpenAudioIn"}, {2, nullptr, "Unknown"},
{3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"},
{0, nullptr, "ListAudioIns"},
{1, nullptr, "OpenAudioIn"},
{2, nullptr, "Unknown"},
{3, nullptr, "OpenAudioInAuto"},
{4, nullptr, "ListAudioInsAuto"},
};
// clang-format on
RegisterHandlers(functions);
}

View File

@@ -12,6 +12,7 @@ namespace Service::Audio {
class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> {
public:
IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "GetFinalOutputRecorderState"},
{1, nullptr, "StartFinalOutputRecorder"},
@@ -20,10 +21,13 @@ public:
{4, nullptr, "RegisterBufferEvent"},
{5, nullptr, "GetReleasedFinalOutputRecorderBuffer"},
{6, nullptr, "ContainsFinalOutputRecorderBuffer"},
{7, nullptr, "Unknown"},
{7, nullptr, "GetFinalOutputRecorderBufferEndTime"},
{8, nullptr, "AppendFinalOutputRecorderBufferAuto"},
{9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"},
{10, nullptr, "FlushFinalOutputRecorderBuffers"},
};
// clang-format on
RegisterHandlers(functions);
}
~IFinalOutputRecorder() = default;

View File

@@ -229,14 +229,16 @@ private:
}; // namespace Audio
AudRenU::AudRenU() : ServiceFramework("audren:u") {
// clang-format off
static const FunctionInfo functions[] = {
{0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
{2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
{2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
{3, nullptr, "OpenAudioRendererAuto"},
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
"GetAudioDeviceServiceWithRevisionInfo"},
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
};
// clang-format on
RegisterHandlers(functions);
}
@@ -313,7 +315,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
}
void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};

View File

@@ -20,7 +20,7 @@ public:
private:
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
void GetAudioDevice(Kernel::HLERequestContext& ctx);
void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
enum class AudioFeatures : u32 {

View File

@@ -5,7 +5,6 @@
#include <chrono>
#include <cstring>
#include <memory>
#include <optional>
#include <vector>
#include <opus.h>
@@ -30,48 +29,66 @@ public:
u32 channel_count)
: ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
sample_rate(sample_rate), channel_count(channel_count) {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
{0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
{1, nullptr, "SetContext"},
{2, nullptr, "DecodeInterleavedForMultiStream"},
{2, nullptr, "DecodeInterleavedForMultiStreamOld"},
{3, nullptr, "SetContextForMultiStream"},
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
"DecodeInterleavedWithPerformance"},
{5, nullptr, "Unknown5"},
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
{5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
{6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
{7, nullptr, "DecodeInterleavedForMultiStream"},
};
// clang-format on
RegisterHandlers(functions);
}
private:
/// Describes extra behavior that may be asked of the decoding context.
enum class ExtraBehavior {
/// No extra behavior.
None,
/// Resets the decoder context back to a freshly initialized state.
ResetContext,
};
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
}
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
u64 performance = 0;
DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
}
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
u32 consumed = 0;
u32 sample_count = 0;
std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) {
LOG_ERROR(Audio, "Failed to decode opus data");
IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code
rb.Push(ResultCode(-1));
return;
}
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(consumed);
rb.Push<u32>(sample_count);
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
IPC::RequestParser rp{ctx};
const auto extra_behavior =
rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
u64 performance = 0;
DecodeInterleavedHelper(ctx, &performance, extra_behavior);
}
void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
ExtraBehavior extra_behavior) {
u32 consumed = 0;
u32 sample_count = 0;
u64 performance = 0;
std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
if (extra_behavior == ExtraBehavior::ResetContext) {
ResetDecoderContext();
}
if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
performance)) {
LOG_ERROR(Audio, "Failed to decode opus data");
@@ -80,25 +97,28 @@ private:
rb.Push(ResultCode(-1));
return;
}
IPC::ResponseBuilder rb{ctx, 6};
const u32 param_size = performance != nullptr ? 6 : 4;
IPC::ResponseBuilder rb{ctx, param_size};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(consumed);
rb.Push<u32>(sample_count);
rb.Push<u64>(performance);
if (performance) {
rb.Push<u64>(*performance);
}
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
}
bool Decoder_DecodeInterleaved(
u32& consumed, u32& sample_count, const std::vector<u8>& input,
std::vector<opus_int16>& output,
std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
std::vector<opus_int16>& output, u64* out_performance_time) {
const auto start_time = std::chrono::high_resolution_clock::now();
std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
if (sizeof(OpusHeader) > input.size()) {
LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
sizeof(OpusHeader), input.size());
return false;
}
OpusHeader hdr{};
std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
@@ -106,8 +126,9 @@ private:
sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
return false;
}
auto frame = input.data() + sizeof(OpusHeader);
auto decoded_sample_count = opus_packet_get_nb_samples(
const auto frame = input.data() + sizeof(OpusHeader);
const auto decoded_sample_count = opus_packet_get_nb_samples(
frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
static_cast<opus_int32>(sample_rate));
if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
@@ -117,8 +138,9 @@ private:
decoded_sample_count * channel_count * sizeof(u16), raw_output_sz);
return false;
}
const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
auto out_sample_count =
const auto out_sample_count =
opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
if (out_sample_count < 0) {
LOG_ERROR(Audio,
@@ -127,16 +149,24 @@ private:
out_sample_count, frame_size, static_cast<u32>(hdr.sz));
return false;
}
const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
sample_count = out_sample_count;
consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
if (performance_time.has_value()) {
performance_time->get() =
if (out_performance_time != nullptr) {
*out_performance_time =
std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
}
return true;
}
void ResetDecoderContext() {
ASSERT(decoder != nullptr);
opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
}
struct OpusHeader {
u32_be sz; // Needs to be BE for some odd reason
INSERT_PADDING_WORDS(1);
@@ -157,6 +187,7 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto sample_rate = rp.Pop<u32>();
const auto channel_count = rp.Pop<u32>();
LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count);
ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 ||
@@ -174,9 +205,10 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto sample_rate = rp.Pop<u32>();
auto channel_count = rp.Pop<u32>();
auto buffer_sz = rp.Pop<u32>();
const auto sample_rate = rp.Pop<u32>();
const auto channel_count = rp.Pop<u32>();
const auto buffer_sz = rp.Pop<u32>();
LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate,
channel_count, buffer_sz);
@@ -185,8 +217,9 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
"Invalid sample rate");
ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
std::size_t worker_sz = WorkerBufferSize(channel_count);
const std::size_t worker_sz = WorkerBufferSize(channel_count);
ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
static_cast<OpusDecoder*>(operator new(worker_sz))};
if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {

View File

@@ -19,16 +19,16 @@ public:
explicit Bt() : ServiceFramework{"bt"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Unknown0"},
{1, nullptr, "Unknown1"},
{2, nullptr, "Unknown2"},
{3, nullptr, "Unknown3"},
{4, nullptr, "Unknown4"},
{5, nullptr, "Unknown5"},
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
{8, nullptr, "Unknown8"},
{9, &Bt::RegisterEvent, "RegisterEvent"},
{0, nullptr, "LeClientReadCharacteristic"},
{1, nullptr, "LeClientReadDescriptor"},
{2, nullptr, "LeClientWriteCharacteristic"},
{3, nullptr, "LeClientWriteDescriptor"},
{4, nullptr, "LeClientRegisterNotification"},
{5, nullptr, "LeClientDeregisterNotification"},
{6, nullptr, "SetLeResponse"},
{7, nullptr, "LeSendIndication"},
{8, nullptr, "GetLeEventInfo"},
{9, &Bt::RegisterBleEvent, "RegisterBleEvent"},
};
// clang-format on
RegisterHandlers(functions);
@@ -39,7 +39,7 @@ public:
}
private:
void RegisterEvent(Kernel::HLERequestContext& ctx) {
void RegisterBleEvent(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_BTM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -55,11 +55,11 @@ public:
explicit BtDrv() : ServiceFramework{"btdrv"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Unknown"},
{1, nullptr, "Init"},
{2, nullptr, "Enable"},
{3, nullptr, "Disable"},
{4, nullptr, "CleanupAndShutdown"},
{0, nullptr, "InitializeBluetoothDriver"},
{1, nullptr, "InitializeBluetooth"},
{2, nullptr, "EnableBluetooth"},
{3, nullptr, "DisableBluetooth"},
{4, nullptr, "CleanupBluetooth"},
{5, nullptr, "GetAdapterProperties"},
{6, nullptr, "GetAdapterProperty"},
{7, nullptr, "SetAdapterProperty"},
@@ -70,36 +70,91 @@ public:
{12, nullptr, "CancelBond"},
{13, nullptr, "PinReply"},
{14, nullptr, "SspReply"},
{15, nullptr, "Unknown2"},
{16, nullptr, "InitInterfaces"},
{17, nullptr, "HidHostInterface_Connect"},
{18, nullptr, "HidHostInterface_Disconnect"},
{19, nullptr, "HidHostInterface_SendData"},
{20, nullptr, "HidHostInterface_SendData2"},
{21, nullptr, "HidHostInterface_SetReport"},
{22, nullptr, "HidHostInterface_GetReport"},
{23, nullptr, "HidHostInterface_WakeController"},
{24, nullptr, "HidHostInterface_AddPairedDevice"},
{25, nullptr, "HidHostInterface_GetPairedDevice"},
{26, nullptr, "HidHostInterface_CleanupAndShutdown"},
{27, nullptr, "Unknown3"},
{28, nullptr, "ExtInterface_SetTSI"},
{29, nullptr, "ExtInterface_SetBurstMode"},
{30, nullptr, "ExtInterface_SetZeroRetran"},
{31, nullptr, "ExtInterface_SetMcMode"},
{32, nullptr, "ExtInterface_StartLlrMode"},
{33, nullptr, "ExtInterface_ExitLlrMode"},
{34, nullptr, "ExtInterface_SetRadio"},
{35, nullptr, "ExtInterface_SetVisibility"},
{36, nullptr, "Unknown4"},
{37, nullptr, "Unknown5"},
{38, nullptr, "HidHostInterface_GetLatestPlr"},
{39, nullptr, "ExtInterface_GetPendingConnections"},
{40, nullptr, "HidHostInterface_GetChannelMap"},
{41, nullptr, "SetIsBluetoothBoostEnabled"},
{42, nullptr, "GetIsBluetoothBoostEnabled"},
{43, nullptr, "SetIsBluetoothAfhEnabled"},
{44, nullptr, "GetIsBluetoothAfhEnabled"},
{15, nullptr, "GetEventInfo"},
{16, nullptr, "InitializeHid"},
{17, nullptr, "HidConnect"},
{18, nullptr, "HidDisconnect"},
{19, nullptr, "HidSendData"},
{20, nullptr, "HidSendData2"},
{21, nullptr, "HidSetReport"},
{22, nullptr, "HidGetReport"},
{23, nullptr, "HidWakeController"},
{24, nullptr, "HidAddPairedDevice"},
{25, nullptr, "HidGetPairedDevice"},
{26, nullptr, "CleanupHid"},
{27, nullptr, "HidGetEventInfo"},
{28, nullptr, "ExtSetTsi"},
{29, nullptr, "ExtSetBurstMode"},
{30, nullptr, "ExtSetZeroRetran"},
{31, nullptr, "ExtSetMcMode"},
{32, nullptr, "ExtStartLlrMode"},
{33, nullptr, "ExtExitLlrMode"},
{34, nullptr, "ExtSetRadio"},
{35, nullptr, "ExtSetVisibility"},
{36, nullptr, "ExtSetTbfcScan"},
{37, nullptr, "RegisterHidReportEvent"},
{38, nullptr, "HidGetReportEventInfo"},
{39, nullptr, "GetLatestPlr"},
{40, nullptr, "ExtGetPendingConnections"},
{41, nullptr, "GetChannelMap"},
{42, nullptr, "EnableBluetoothBoostSetting"},
{43, nullptr, "IsBluetoothBoostSettingEnabled"},
{44, nullptr, "EnableBluetoothAfhSetting"},
{45, nullptr, "IsBluetoothAfhSettingEnabled"},
{46, nullptr, "InitializeBluetoothLe"},
{47, nullptr, "EnableBluetoothLe"},
{48, nullptr, "DisableBluetoothLe"},
{49, nullptr, "CleanupBluetoothLe"},
{50, nullptr, "SetLeVisibility"},
{51, nullptr, "SetLeConnectionParameter"},
{52, nullptr, "SetLeDefaultConnectionParameter"},
{53, nullptr, "SetLeAdvertiseData"},
{54, nullptr, "SetLeAdvertiseParameter"},
{55, nullptr, "StartLeScan"},
{56, nullptr, "StopLeScan"},
{57, nullptr, "AddLeScanFilterCondition"},
{58, nullptr, "DeleteLeScanFilterCondition"},
{59, nullptr, "DeleteLeScanFilter"},
{60, nullptr, "ClearLeScanFilters"},
{61, nullptr, "EnableLeScanFilter"},
{62, nullptr, "RegisterLeClient"},
{63, nullptr, "UnregisterLeClient"},
{64, nullptr, "UnregisterLeClientAll"},
{65, nullptr, "LeClientConnect"},
{66, nullptr, "LeClientCancelConnection"},
{67, nullptr, "LeClientDisconnect"},
{68, nullptr, "LeClientGetAttributes"},
{69, nullptr, "LeClientDiscoverService"},
{70, nullptr, "LeClientConfigureMtu"},
{71, nullptr, "RegisterLeServer"},
{72, nullptr, "UnregisterLeServer"},
{73, nullptr, "LeServerConnect"},
{74, nullptr, "LeServerDisconnect"},
{75, nullptr, "CreateLeService"},
{76, nullptr, "StartLeService"},
{77, nullptr, "AddLeCharacteristic"},
{78, nullptr, "AddLeDescriptor"},
{79, nullptr, "GetLeCoreEventInfo"},
{80, nullptr, "LeGetFirstCharacteristic"},
{81, nullptr, "LeGetNextCharacteristic"},
{82, nullptr, "LeGetFirstDescriptor"},
{83, nullptr, "LeGetNextDescriptor"},
{84, nullptr, "RegisterLeCoreDataPath"},
{85, nullptr, "UnregisterLeCoreDataPath"},
{86, nullptr, "RegisterLeHidDataPath"},
{87, nullptr, "UnregisterLeHidDataPath"},
{88, nullptr, "RegisterLeDataPath"},
{89, nullptr, "UnregisterLeDataPath"},
{90, nullptr, "LeClientReadCharacteristic"},
{91, nullptr, "LeClientReadDescriptor"},
{92, nullptr, "LeClientWriteCharacteristic"},
{93, nullptr, "LeClientWriteDescriptor"},
{94, nullptr, "LeClientRegisterNotification"},
{95, nullptr, "LeClientDeregisterNotification"},
{96, nullptr, "GetLeHidEventInfo"},
{97, nullptr, "RegisterBleHidEvent"},
{98, nullptr, "SetLeScanParameter"},
{256, nullptr, "GetIsManufacturingMode"}
};
// clang-format on

View File

@@ -20,38 +20,38 @@ public:
explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IBtmUserCore::GetScanEvent, "GetScanEvent"},
{1, nullptr, "Unknown1"},
{2, nullptr, "Unknown2"},
{3, nullptr, "Unknown3"},
{4, nullptr, "Unknown4"},
{5, nullptr, "Unknown5"},
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
{8, nullptr, "Unknown8"},
{9, nullptr, "Unknown9"},
{10, nullptr, "Unknown10"},
{17, &IBtmUserCore::GetConnectionEvent, "GetConnectionEvent"},
{18, nullptr, "Unknown18"},
{19, nullptr, "Unknown19"},
{20, nullptr, "Unknown20"},
{21, nullptr, "Unknown21"},
{22, nullptr, "Unknown22"},
{23, nullptr, "Unknown23"},
{24, nullptr, "Unknown24"},
{25, nullptr, "Unknown25"},
{26, &IBtmUserCore::GetDiscoveryEvent, "AcquireBleServiceDiscoveryEventImpl"},
{27, nullptr, "Unknown27"},
{28, nullptr, "Unknown28"},
{29, nullptr, "Unknown29"},
{30, nullptr, "Unknown30"},
{31, nullptr, "Unknown31"},
{32, nullptr, "Unknown32"},
{33, &IBtmUserCore::GetConfigEvent, "GetConfigEvent"},
{34, nullptr, "Unknown34"},
{35, nullptr, "Unknown35"},
{36, nullptr, "Unknown36"},
{37, nullptr, "Unknown37"},
{0, &IBtmUserCore::AcquireBleScanEvent, "AcquireBleScanEvent"},
{1, nullptr, "GetBleScanFilterParameter"},
{2, nullptr, "GetBleScanFilterParameter2"},
{3, nullptr, "StartBleScanForGeneral"},
{4, nullptr, "StopBleScanForGeneral"},
{5, nullptr, "GetBleScanResultsForGeneral"},
{6, nullptr, "StartBleScanForPaired"},
{7, nullptr, "StopBleScanForPaired"},
{8, nullptr, "StartBleScanForSmartDevice"},
{9, nullptr, "StopBleScanForSmartDevice"},
{10, nullptr, "GetBleScanResultsForSmartDevice"},
{17, &IBtmUserCore::AcquireBleConnectionEvent, "AcquireBleConnectionEvent"},
{18, nullptr, "BleConnect"},
{19, nullptr, "BleDisconnect"},
{20, nullptr, "BleGetConnectionState"},
{21, nullptr, "AcquireBlePairingEvent"},
{22, nullptr, "BlePairDevice"},
{23, nullptr, "BleUnPairDevice"},
{24, nullptr, "BleUnPairDevice2"},
{25, nullptr, "BleGetPairedDevices"},
{26, &IBtmUserCore::AcquireBleServiceDiscoveryEvent, "AcquireBleServiceDiscoveryEvent"},
{27, nullptr, "GetGattServices"},
{28, nullptr, "GetGattService"},
{29, nullptr, "GetGattIncludedServices"},
{30, nullptr, "GetBelongingGattService"},
{31, nullptr, "GetGattCharacteristics"},
{32, nullptr, "GetGattDescriptors"},
{33, &IBtmUserCore::AcquireBleMtuConfigEvent, "AcquireBleMtuConfigEvent"},
{34, nullptr, "ConfigureBleMtu"},
{35, nullptr, "GetBleMtu"},
{36, nullptr, "RegisterBleGattDataPath"},
{37, nullptr, "UnregisterBleGattDataPath"},
};
// clang-format on
RegisterHandlers(functions);
@@ -68,7 +68,7 @@ public:
}
private:
void GetScanEvent(Kernel::HLERequestContext& ctx) {
void AcquireBleScanEvent(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_BTM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -76,7 +76,7 @@ private:
rb.PushCopyObjects(scan_event.readable);
}
void GetConnectionEvent(Kernel::HLERequestContext& ctx) {
void AcquireBleConnectionEvent(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_BTM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -84,7 +84,7 @@ private:
rb.PushCopyObjects(connection_event.readable);
}
void GetDiscoveryEvent(Kernel::HLERequestContext& ctx) {
void AcquireBleServiceDiscoveryEvent(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_BTM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -92,7 +92,7 @@ private:
rb.PushCopyObjects(service_discovery.readable);
}
void GetConfigEvent(Kernel::HLERequestContext& ctx) {
void AcquireBleMtuConfigEvent(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_BTM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -111,14 +111,14 @@ public:
explicit BTM_USR() : ServiceFramework{"btm:u"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &BTM_USR::GetCoreImpl, "GetCoreImpl"},
{0, &BTM_USR::GetCore, "GetCore"},
};
// clang-format on
RegisterHandlers(functions);
}
private:
void GetCoreImpl(Kernel::HLERequestContext& ctx) {
void GetCore(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_BTM, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -134,26 +134,64 @@ public:
static const FunctionInfo functions[] = {
{0, nullptr, "Unknown1"},
{1, nullptr, "Unknown2"},
{2, nullptr, "RegisterSystemEventForConnectedDeviceConditionImpl"},
{2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"},
{3, nullptr, "Unknown3"},
{4, nullptr, "Unknown4"},
{5, nullptr, "Unknown5"},
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
{8, nullptr, "RegisterSystemEventForRegisteredDeviceInfoImpl"},
{8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"},
{9, nullptr, "Unknown8"},
{10, nullptr, "Unknown9"},
{11, nullptr, "Unknown10"},
{12, nullptr, "Unknown11"},
{13, nullptr, "Unknown12"},
{14, nullptr, "EnableRadioImpl"},
{15, nullptr, "DisableRadioImpl"},
{14, nullptr, "EnableRadio"},
{15, nullptr, "DisableRadio"},
{16, nullptr, "Unknown13"},
{17, nullptr, "Unknown14"},
{18, nullptr, "Unknown15"},
{19, nullptr, "Unknown16"},
{20, nullptr, "Unknown17"},
{21, nullptr, "Unknown18"},
{22, nullptr, "Unknown19"},
{23, nullptr, "Unknown20"},
{24, nullptr, "Unknown21"},
{25, nullptr, "Unknown22"},
{26, nullptr, "Unknown23"},
{27, nullptr, "Unknown24"},
{28, nullptr, "Unknown25"},
{29, nullptr, "Unknown26"},
{30, nullptr, "Unknown27"},
{31, nullptr, "Unknown28"},
{32, nullptr, "Unknown29"},
{33, nullptr, "Unknown30"},
{34, nullptr, "Unknown31"},
{35, nullptr, "Unknown32"},
{36, nullptr, "Unknown33"},
{37, nullptr, "Unknown34"},
{38, nullptr, "Unknown35"},
{39, nullptr, "Unknown36"},
{40, nullptr, "Unknown37"},
{41, nullptr, "Unknown38"},
{42, nullptr, "Unknown39"},
{43, nullptr, "Unknown40"},
{44, nullptr, "Unknown41"},
{45, nullptr, "Unknown42"},
{46, nullptr, "Unknown43"},
{47, nullptr, "Unknown44"},
{48, nullptr, "Unknown45"},
{49, nullptr, "Unknown46"},
{50, nullptr, "Unknown47"},
{51, nullptr, "Unknown48"},
{52, nullptr, "Unknown49"},
{53, nullptr, "Unknown50"},
{54, nullptr, "Unknown51"},
{55, nullptr, "Unknown52"},
{56, nullptr, "Unknown53"},
{57, nullptr, "Unknown54"},
{58, nullptr, "Unknown55"},
{59, nullptr, "Unknown56"},
};
// clang-format on
@@ -166,7 +204,7 @@ public:
explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "RegisterSystemEventForDiscoveryImpl"},
{0, nullptr, "RegisterSystemEventForDiscovery"},
{1, nullptr, "Unknown1"},
{2, nullptr, "Unknown2"},
{3, nullptr, "Unknown3"},
@@ -175,6 +213,10 @@ public:
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
{8, nullptr, "Unknown8"},
{9, nullptr, "Unknown9"},
{10, nullptr, "Unknown10"},
{11, nullptr, "Unknown11"},
{12, nullptr, "Unknown11"},
};
// clang-format on
@@ -187,16 +229,16 @@ public:
explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "StartGamepadPairingImpl"},
{1, nullptr, "CancelGamepadPairingImpl"},
{2, nullptr, "ClearGamepadPairingDatabaseImpl"},
{3, nullptr, "GetPairedGamepadCountImpl"},
{4, nullptr, "EnableRadioImpl"},
{5, nullptr, "DisableRadioImpl"},
{6, nullptr, "GetRadioOnOffImpl"},
{7, nullptr, "AcquireRadioEventImpl"},
{8, nullptr, "AcquireGamepadPairingEventImpl"},
{9, nullptr, "IsGamepadPairingStartedImpl"},
{0, nullptr, "StartGamepadPairing"},
{1, nullptr, "CancelGamepadPairing"},
{2, nullptr, "ClearGamepadPairingDatabase"},
{3, nullptr, "GetPairedGamepadCount"},
{4, nullptr, "EnableRadio"},
{5, nullptr, "DisableRadio"},
{6, nullptr, "GetRadioOnOff"},
{7, nullptr, "AcquireRadioEvent"},
{8, nullptr, "AcquireGamepadPairingEvent"},
{9, nullptr, "IsGamepadPairingStarted"},
};
// clang-format on
@@ -209,7 +251,7 @@ public:
explicit BTM_SYS() : ServiceFramework{"btm:sys"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &BTM_SYS::GetCoreImpl, "GetCoreImpl"},
{0, &BTM_SYS::GetCore, "GetCore"},
};
// clang-format on
@@ -217,7 +259,7 @@ public:
}
private:
void GetCoreImpl(Kernel::HLERequestContext& ctx) {
void GetCore(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_BTM, "called");
IPC::ResponseBuilder rb{ctx, 2, 0, 1};

View File

@@ -627,8 +627,8 @@ private:
FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "MountContent"},
{1, &FSP_SRV::Initialize, "Initialize"},
{0, nullptr, "OpenFileSystem"},
{1, &FSP_SRV::SetCurrentProcess, "SetCurrentProcess"},
{2, nullptr, "OpenDataFileSystemByCurrentProcess"},
{7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"},
{8, nullptr, "OpenFileSystemWithId"},
@@ -637,10 +637,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
{12, nullptr, "OpenBisStorage"},
{13, nullptr, "InvalidateBisCache"},
{17, nullptr, "OpenHostFileSystem"},
{18, &FSP_SRV::MountSdCard, "MountSdCard"},
{18, &FSP_SRV::OpenSdCardFileSystem, "OpenSdCardFileSystem"},
{19, nullptr, "FormatSdCardFileSystem"},
{21, nullptr, "DeleteSaveDataFileSystem"},
{22, &FSP_SRV::CreateSaveData, "CreateSaveData"},
{22, &FSP_SRV::CreateSaveDataFileSystem, "CreateSaveDataFileSystem"},
{23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"},
{24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"},
{25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"},
@@ -652,7 +652,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
{32, nullptr, "ExtendSaveDataFileSystem"},
{33, nullptr, "DeleteCacheStorage"},
{34, nullptr, "GetCacheStorageSize"},
{51, &FSP_SRV::MountSaveData, "MountSaveData"},
{35, nullptr, "CreateSaveDataFileSystemByHashSalt"},
{51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"},
{52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"},
{53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"},
{57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"},
@@ -664,21 +665,26 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
{64, nullptr, "OpenSaveDataInternalStorageFileSystem"},
{65, nullptr, "UpdateSaveDataMacForDebug"},
{66, nullptr, "WriteSaveDataFileSystemExtraData2"},
{67, nullptr, "FindSaveDataWithFilter"},
{68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"},
{80, nullptr, "OpenSaveDataMetaFile"},
{81, nullptr, "OpenSaveDataTransferManager"},
{82, nullptr, "OpenSaveDataTransferManagerVersion2"},
{83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"},
{84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"},
{100, nullptr, "OpenImageDirectoryFileSystem"},
{110, nullptr, "OpenContentStorageFileSystem"},
{120, nullptr, "OpenCloudBackupWorkStorageFileSystem"},
{200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"},
{201, nullptr, "OpenDataStorageByProgramId"},
{202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"},
{203, &FSP_SRV::OpenRomStorage, "OpenRomStorage"},
{203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"},
{400, nullptr, "OpenDeviceOperator"},
{500, nullptr, "OpenSdCardDetectionEventNotifier"},
{501, nullptr, "OpenGameCardDetectionEventNotifier"},
{510, nullptr, "OpenSystemDataUpdateEventNotifier"},
{511, nullptr, "NotifySystemDataUpdateEvent"},
{520, nullptr, "SimulateGameCardDetectionEvent"},
{600, nullptr, "SetCurrentPosixTime"},
{601, nullptr, "QuerySaveDataTotalSize"},
{602, nullptr, "VerifySaveDataFileSystem"},
@@ -717,6 +723,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
{1008, nullptr, "OpenRegisteredUpdatePartition"},
{1009, nullptr, "GetAndClearMemoryReportInfo"},
{1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
{1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
{1200, nullptr, "OpenMultiCommitManager"},
};
// clang-format on
RegisterHandlers(functions);
@@ -724,7 +732,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
FSP_SRV::~FSP_SRV() = default;
void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) {
void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_FS, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
@@ -743,7 +751,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
rb.Push(ResultCode(-1));
}
void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_FS, "called");
IFileSystem filesystem(OpenSDMC().Unwrap());
@@ -753,7 +761,7 @@ void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
rb.PushIpcInterface<IFileSystem>(std::move(filesystem));
}
void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
@@ -767,7 +775,7 @@ void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
@@ -793,7 +801,7 @@ void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem");
MountSaveData(ctx);
OpenSaveDataFileSystem(ctx);
}
void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) {
@@ -881,7 +889,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {
rb.PushIpcInterface<IStorage>(std::move(storage));
}
void FSP_SRV::OpenRomStorage(Kernel::HLERequestContext& ctx) {
void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto storage_id = rp.PopRaw<FileSys::StorageId>();

View File

@@ -19,17 +19,17 @@ public:
~FSP_SRV() override;
private:
void Initialize(Kernel::HLERequestContext& ctx);
void SetCurrentProcess(Kernel::HLERequestContext& ctx);
void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx);
void MountSdCard(Kernel::HLERequestContext& ctx);
void CreateSaveData(Kernel::HLERequestContext& ctx);
void MountSaveData(Kernel::HLERequestContext& ctx);
void OpenSdCardFileSystem(Kernel::HLERequestContext& ctx);
void CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx);
void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
void OpenRomStorage(Kernel::HLERequestContext& ctx);
void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
FileSys::VirtualFile romfs;
};

View File

@@ -40,10 +40,10 @@ public:
{6, nullptr, "CloseContentStorageForcibly"},
{7, nullptr, "CloseContentMetaDatabaseForcibly"},
{8, nullptr, "CleanupContentMetaDatabase"},
{9, nullptr, "OpenContentStorage2"},
{10, nullptr, "CloseContentStorage"},
{11, nullptr, "OpenContentMetaDatabase2"},
{12, nullptr, "CloseContentMetaDatabase"},
{9, nullptr, "ActivateContentStorage"},
{10, nullptr, "InactivateContentStorage"},
{11, nullptr, "ActivateContentMetaDatabase"},
{12, nullptr, "InactivateContentMetaDatabase"},
};
// clang-format on

View File

@@ -43,7 +43,7 @@ public:
{11, nullptr, "CalculateApplicationOccupiedSize"},
{16, nullptr, "PushApplicationRecord"},
{17, nullptr, "ListApplicationRecordContentMeta"},
{19, nullptr, "LaunchApplication"},
{19, nullptr, "LaunchApplicationOld"},
{21, nullptr, "GetApplicationContentPath"},
{22, nullptr, "TerminateApplication"},
{23, nullptr, "ResolveApplicationContentPath"},
@@ -96,10 +96,10 @@ public:
{86, nullptr, "EnableApplicationCrashReport"},
{87, nullptr, "IsApplicationCrashReportEnabled"},
{90, nullptr, "BoostSystemMemoryResourceLimit"},
{91, nullptr, "Unknown1"},
{92, nullptr, "Unknown2"},
{91, nullptr, "DeprecatedLaunchApplication"},
{92, nullptr, "GetRunningApplicationProgramId"},
{93, nullptr, "GetMainApplicationProgramIndex"},
{94, nullptr, "LaunchApplication2"},
{94, nullptr, "LaunchApplication"},
{95, nullptr, "GetApplicationLaunchInfo"},
{96, nullptr, "AcquireApplicationLaunchInfo"},
{97, nullptr, "GetMainApplicationProgramIndex2"},
@@ -163,7 +163,7 @@ public:
{907, nullptr, "WithdrawApplicationUpdateRequest"},
{908, nullptr, "ListApplicationRecordInstalledContentMeta"},
{909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"},
{910, nullptr, "Unknown3"},
{910, nullptr, "HasApplicationRecord"},
{911, nullptr, "SetPreInstalledApplication"},
{912, nullptr, "ClearPreInstalledApplicationFlag"},
{1000, nullptr, "RequestVerifyApplicationDeprecated"},
@@ -219,10 +219,10 @@ public:
{2015, nullptr, "CompareSystemDeliveryInfo"},
{2016, nullptr, "ListNotCommittedContentMeta"},
{2017, nullptr, "CreateDownloadTask"},
{2018, nullptr, "Unknown4"},
{2050, nullptr, "Unknown5"},
{2100, nullptr, "Unknown6"},
{2101, nullptr, "Unknown7"},
{2018, nullptr, "GetApplicationDeliveryInfoHash"},
{2050, nullptr, "GetApplicationRightsOnClient"},
{2100, nullptr, "GetApplicationTerminateResult"},
{2101, nullptr, "GetRawApplicationTerminateResult"},
{2150, nullptr, "CreateRightsEnvironment"},
{2151, nullptr, "DestroyRightsEnvironment"},
{2152, nullptr, "ActivateRightsEnvironment"},
@@ -237,10 +237,10 @@ public:
{2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"},
{2190, nullptr, "GetRightsEnvironmentHandleForApplication"},
{2199, nullptr, "GetRightsEnvironmentCountForDebug"},
{2200, nullptr, "Unknown8"},
{2201, nullptr, "Unknown9"},
{2250, nullptr, "Unknown10"},
{2300, nullptr, "Unknown11"},
{2200, nullptr, "GetGameCardApplicationCopyIdentifier"},
{2201, nullptr, "GetInstalledApplicationCopyIdentifier"},
{2250, nullptr, "RequestReportActiveELicence"},
{2300, nullptr, "ListEventLog"},
};
// clang-format on
@@ -355,6 +355,7 @@ public:
static const FunctionInfo functions[] = {
{21, nullptr, "GetApplicationContentPath"},
{23, nullptr, "ResolveApplicationContentPath"},
{93, nullptr, "GetRunningApplicationProgramId"},
};
// clang-format on
@@ -389,6 +390,11 @@ public:
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "RequestLinkDevice"},
{1, nullptr, "RequestCleanupAllPreInstalledApplications"},
{2, nullptr, "RequestCleanupPreInstalledApplication"},
{3, nullptr, "RequestSyncRights"},
{4, nullptr, "RequestUnlinkDevice"},
{5, nullptr, "RequestRevokeAllELicense"},
};
// clang-format on
@@ -403,7 +409,7 @@ public:
static const FunctionInfo functions[] = {
{100, nullptr, "ResetToFactorySettings"},
{101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"},
{102, nullptr, "ResetToFactorySettingsForRefurbishment "},
{102, nullptr, "ResetToFactorySettingsForRefurbishment"},
};
// clang-format on

View File

@@ -5,7 +5,6 @@
#include <algorithm>
#include <optional>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -22,7 +21,6 @@
#include "core/hle/service/nvflinger/nvflinger.h"
#include "core/perf_stats.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Service::NVFlinger {
@@ -30,12 +28,6 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
NVFlinger::NVFlinger() {
// Add the different displays to the list of displays.
displays.emplace_back(0, "Default");
displays.emplace_back(1, "External");
displays.emplace_back(2, "Edid");
displays.emplace_back(3, "Internal");
// Schedule the screen composition events
composition_event =
CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -54,66 +46,120 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
nvdrv = std::move(instance);
}
u64 NVFlinger::OpenDisplay(std::string_view name) {
LOG_WARNING(Service, "Opening display {}", name);
std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
LOG_DEBUG(Service, "Opening \"{}\" display", name);
// TODO(Subv): Currently we only support the Default display.
ASSERT(name == "Default");
auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
ASSERT(itr != displays.end());
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
if (itr == displays.end()) {
return {};
}
return itr->id;
}
u64 NVFlinger::CreateLayer(u64 display_id) {
auto& display = GetDisplay(display_id);
std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
auto* const display = FindDisplay(display_id);
ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment");
if (display == nullptr) {
return {};
}
u64 layer_id = next_layer_id++;
u32 buffer_queue_id = next_buffer_queue_id++;
ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
const u64 layer_id = next_layer_id++;
const u32 buffer_queue_id = next_buffer_queue_id++;
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
display.layers.emplace_back(layer_id, buffer_queue);
display->layers.emplace_back(layer_id, buffer_queue);
buffer_queues.emplace_back(std::move(buffer_queue));
return layer_id;
}
u32 NVFlinger::GetBufferQueueId(u64 display_id, u64 layer_id) {
const auto& layer = GetLayer(display_id, layer_id);
return layer.buffer_queue->GetId();
std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
const auto* const layer = FindLayer(display_id, layer_id);
if (layer == nullptr) {
return {};
}
return layer->buffer_queue->GetId();
}
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) {
return GetDisplay(display_id).vsync_event.readable;
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
return display->vsync_event.readable;
}
std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const {
auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[&](const auto& queue) { return queue->GetId() == id; });
std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[&](const auto& queue) { return queue->GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
Display& NVFlinger::GetDisplay(u64 display_id) {
auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
ASSERT(itr != displays.end());
return *itr;
if (itr == displays.end()) {
return nullptr;
}
return &*itr;
}
Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) {
auto& display = GetDisplay(display_id);
const Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
auto itr = std::find_if(display.layers.begin(), display.layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == displays.end()) {
return nullptr;
}
ASSERT(itr != display.layers.end());
return *itr;
return &*itr;
}
Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
const auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
}
void NVFlinger::Compose() {
@@ -145,7 +191,7 @@ void NVFlinger::Compose() {
continue;
}
auto& igbp_buffer = buffer->get().igbp_buffer;
const auto& igbp_buffer = buffer->get().igbp_buffer;
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based

View File

@@ -4,7 +4,9 @@
#pragma once
#include <array>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
@@ -56,35 +58,55 @@ public:
/// Sets the NVDrv module instance to use to send buffers to the GPU.
void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);
/// Opens the specified display and returns the id.
u64 OpenDisplay(std::string_view name);
/// Opens the specified display and returns the ID.
///
/// If an invalid display name is provided, then an empty optional is returned.
std::optional<u64> OpenDisplay(std::string_view name);
/// Creates a layer on the specified display and returns the layer id.
u64 CreateLayer(u64 display_id);
/// Creates a layer on the specified display and returns the layer ID.
///
/// If an invalid display ID is specified, then an empty optional is returned.
std::optional<u64> CreateLayer(u64 display_id);
/// Gets the buffer queue id of the specified layer in the specified display.
u32 GetBufferQueueId(u64 display_id, u64 layer_id);
/// Finds the buffer queue ID of the specified layer in the specified display.
///
/// If an invalid display ID or layer ID is provided, then an empty optional is returned.
std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
/// Gets the vsync event for the specified display.
Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id);
///
/// If an invalid display ID is provided, then nullptr is returned.
Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
/// Obtains a buffer queue identified by the id.
std::shared_ptr<BufferQueue> GetBufferQueue(u32 id) const;
/// Obtains a buffer queue identified by the ID.
std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
/// finished.
void Compose();
private:
/// Returns the display identified by the specified id.
Display& GetDisplay(u64 display_id);
/// Finds the display identified by the specified ID.
Display* FindDisplay(u64 display_id);
/// Returns the layer identified by the specified id in the desired display.
Layer& GetLayer(u64 display_id, u64 layer_id);
/// Finds the display identified by the specified ID.
const Display* FindDisplay(u64 display_id) const;
/// Finds the layer identified by the specified ID in the desired display.
Layer* FindLayer(u64 display_id, u64 layer_id);
/// Finds the layer identified by the specified ID in the desired display.
const Layer* FindLayer(u64 display_id, u64 layer_id) const;
std::shared_ptr<Nvidia::Module> nvdrv;
std::vector<Display> displays;
std::array<Display, 5> displays{{
{0, "Default"},
{1, "External"},
{2, "Edid"},
{3, "Internal"},
{4, "Null"},
}};
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
/// Id to use for the next layer that is created, this counter is shared among all displays.

View File

@@ -17,13 +17,13 @@ public:
explicit PSC_C() : ServiceFramework{"psc:c"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Unknown1"},
{1, nullptr, "Unknown2"},
{2, nullptr, "Unknown3"},
{3, nullptr, "Unknown4"},
{4, nullptr, "Unknown5"},
{5, nullptr, "Unknown6"},
{6, nullptr, "Unknown7"},
{0, nullptr, "Initialize"},
{1, nullptr, "DispatchRequest"},
{2, nullptr, "GetResult"},
{3, nullptr, "GetState"},
{4, nullptr, "Cancel"},
{5, nullptr, "PrintModuleInformation"},
{6, nullptr, "GetModuleInformation"},
};
// clang-format on
@@ -39,7 +39,8 @@ public:
{0, nullptr, "Initialize"},
{1, nullptr, "GetRequest"},
{2, nullptr, "Acknowledge"},
{3, nullptr, "Unknown1"},
{3, nullptr, "Finalize"},
{4, nullptr, "AcknowledgeEx"},
};
// clang-format on

View File

@@ -34,6 +34,7 @@ namespace Service::VI {
constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
struct DisplayInfo {
/// The name of this particular display.
@@ -524,7 +525,7 @@ private:
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
static_cast<u32>(transaction), flags);
auto buffer_queue = nv_flinger->GetBufferQueue(id);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
if (transaction == TransactionId::Connect) {
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -558,7 +559,7 @@ private:
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available
auto buffer_queue = nv_flinger->GetBufferQueue(id);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
@@ -628,7 +629,7 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
const auto buffer_queue = nv_flinger->GetBufferQueue(id);
const auto buffer_queue = nv_flinger->FindBufferQueue(id);
// TODO(Subv): Find out what this actually is.
IPC::ResponseBuilder rb{ctx, 2, 1};
@@ -838,11 +839,16 @@ private:
"(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
unknown, display, aruid);
const u64 layer_id = nv_flinger->CreateLayer(display);
const auto layer_id = nv_flinger->CreateLayer(display);
if (!layer_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push(layer_id);
rb.Push(*layer_id);
}
void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -950,9 +956,16 @@ private:
ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
const auto display_id = nv_flinger->OpenDisplay(name);
if (!display_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(nv_flinger->OpenDisplay(name));
rb.Push<u64>(*display_id);
}
void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1043,10 +1056,21 @@ private:
LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);
const u64 display_id = nv_flinger->OpenDisplay(display_name);
const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
const auto display_id = nv_flinger->OpenDisplay(display_name);
if (!display_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
NativeWindow native_window{buffer_queue_id};
const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
if (!buffer_queue_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
NativeWindow native_window{*buffer_queue_id};
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1062,13 +1086,24 @@ private:
// TODO(Subv): What's the difference between a Stray and a Managed layer?
const u64 layer_id = nv_flinger->CreateLayer(display_id);
const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id);
const auto layer_id = nv_flinger->CreateLayer(display_id);
if (!layer_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
NativeWindow native_window{buffer_queue_id};
const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
if (!buffer_queue_id) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
NativeWindow native_window{*buffer_queue_id};
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(RESULT_SUCCESS);
rb.Push(layer_id);
rb.Push(*layer_id);
rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
}
@@ -1088,7 +1123,12 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
const auto vsync_event = nv_flinger->GetVsyncEvent(display_id);
const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
if (!vsync_event) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_FOUND);
return;
}
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(RESULT_SUCCESS);

View File

@@ -391,6 +391,7 @@ struct Values {
float resolution_factor;
bool use_frame_limit;
u16 frame_limit;
bool use_disk_shader_cache;
bool use_accurate_gpu_emulation;
float bg_red;

View File

@@ -158,6 +158,8 @@ TelemetrySession::TelemetrySession() {
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
Settings::values.use_frame_limit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
Settings::values.use_disk_shader_cache);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
Settings::values.use_accurate_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",

View File

@@ -44,6 +44,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_cache.h
renderer_opengl/gl_shader_decompiler.cpp
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_disk_cache.cpp
renderer_opengl/gl_shader_disk_cache.h
renderer_opengl/gl_shader_gen.cpp
renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_manager.cpp
@@ -102,4 +104,4 @@ add_library(video_core STATIC
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad)
target_link_libraries(video_core PRIVATE glad lz4_static)

View File

@@ -35,8 +35,10 @@ void DmaPusher::DispatchCalls() {
bool DmaPusher::Step() {
if (dma_get != dma_put) {
// Push buffer non-empty, read a word
const CommandHeader command_header{
Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))};
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
const CommandHeader command_header{Memory::Read32(*address)};
dma_get += sizeof(u32);

View File

@@ -83,7 +83,7 @@ private:
u32 subchannel; ///< Current subchannel
u32 method_count; ///< Current method count
u32 length_pending; ///< Large NI command length pending
bool non_incrementing; ///< Current commands NI flag
bool non_incrementing; ///< Current command's NI flag
};
DmaState dma_state{};

View File

@@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
regs.reg_array[method_call.method] = method_call.argument;
switch (method_call.method) {
case FERMI2D_REG_INDEX(trigger): {
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
// so trigger on the second 32-bit write.
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
HandleSurfaceCopy();
break;
}
@@ -32,55 +34,23 @@ void Fermi2D::HandleSurfaceCopy() {
LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
static_cast<u32>(regs.operation));
const GPUVAddr source = regs.src.Address();
const GPUVAddr dest = regs.dst.Address();
// TODO(Subv): Only same-format and same-size copies are allowed for now.
ASSERT(regs.src.format == regs.dst.format);
ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
// TODO(Subv): Only raw copies are implemented.
ASSERT(regs.operation == Regs::Operation::SrcCopy);
const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
const u32 src_blit_x2{
static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
const u32 src_blit_y2{
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
// All copies here update the main memory, so mark all rasterizer states as invalid.
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu,
dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
if (regs.src.linear == regs.dst.linear) {
// If the input layout and the output layout are the same, just perform a raw copy.
ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
Memory::CopyBlock(dest_cpu, source_cpu,
src_bytes_per_pixel * regs.dst.width * regs.dst.height);
return;
}
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);
if (!regs.src.linear && regs.dst.linear) {
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
dst_buffer, true, regs.src.BlockHeight(),
regs.src.BlockDepth(), 0);
} else {
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
src_buffer, false, regs.dst.BlockHeight(),
regs.dst.BlockDepth(), 0);
}
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
UNIMPLEMENTED();
}
}

View File

@@ -94,12 +94,22 @@ public:
Operation operation;
INSERT_PADDING_WORDS(0x9);
INSERT_PADDING_WORDS(0x177);
// TODO(Subv): This is only a guess.
u32 trigger;
u32 blit_control;
INSERT_PADDING_WORDS(0x1A3);
INSERT_PADDING_WORDS(0x8);
u32 blit_dst_x;
u32 blit_dst_y;
u32 blit_dst_width;
u32 blit_dst_height;
u64 blit_du_dx;
u64 blit_dv_dy;
u64 blit_src_x;
u64 blit_src_y;
INSERT_PADDING_WORDS(0x21);
};
std::array<u32, NUM_REGS> reg_array;
};
@@ -122,7 +132,16 @@ private:
ASSERT_REG_POSITION(dst, 0x80);
ASSERT_REG_POSITION(src, 0x8C);
ASSERT_REG_POSITION(operation, 0xAB);
ASSERT_REG_POSITION(trigger, 0xB5);
ASSERT_REG_POSITION(blit_control, 0x223);
ASSERT_REG_POSITION(blit_dst_x, 0x22c);
ASSERT_REG_POSITION(blit_dst_y, 0x22d);
ASSERT_REG_POSITION(blit_dst_width, 0x22e);
ASSERT_REG_POSITION(blit_dst_height, 0x22f);
ASSERT_REG_POSITION(blit_du_dx, 0x230);
ASSERT_REG_POSITION(blit_dv_dy, 0x232);
ASSERT_REG_POSITION(blit_src_x, 0x234);
ASSERT_REG_POSITION(blit_src_y, 0x236);
#undef ASSERT_REG_POSITION
} // namespace Tegra::Engines

View File

@@ -39,16 +39,17 @@ void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
GPUVAddr address = regs.dest.Address();
VAddr dest_address =
*memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
const GPUVAddr address = regs.dest.Address();
const auto dest_address =
memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
ASSERT_MSG(dest_address, "Invalid GPU address");
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_address, sizeof(u32));
rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
Memory::Write32(dest_address, data);
Memory::Write32(*dest_address, data);
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;

View File

@@ -273,7 +273,8 @@ void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress();
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing.
std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
const auto address = memory_manager.GpuToCpuAddress(sequence_address);
ASSERT_MSG(address, "Invalid GPU address");
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -386,14 +387,14 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
void Maxwell3D::ProcessCBData(u32 value) {
// Write the input value to the current const buffer at the current position.
GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
std::optional<VAddr> address =
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
ASSERT_MSG(address, "Invalid GPU address");
Memory::Write32(*address, value);
dirty_flags.OnMemoryWrite();
@@ -403,10 +404,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
GPUVAddr tic_base_address = regs.tic.TICAddress();
const GPUVAddr tic_base_address = regs.tic.TICAddress();
GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
Texture::TICEntry tic_entry;
Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -415,10 +417,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
auto r_type = tic_entry.r_type.Value();
auto g_type = tic_entry.g_type.Value();
auto b_type = tic_entry.b_type.Value();
auto a_type = tic_entry.a_type.Value();
const auto r_type = tic_entry.r_type.Value();
const auto g_type = tic_entry.g_type.Value();
const auto b_type = tic_entry.b_type.Value();
const auto a_type = tic_entry.a_type.Value();
// TODO(Subv): Different data types for separate components are not supported
ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -427,10 +429,11 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
Texture::TSCEntry tsc_entry;
Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -452,8 +455,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
Texture::TextureHandle tex_handle{
Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
const auto address = memory_manager.GpuToCpuAddress(current_texture);
ASSERT_MSG(address, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -462,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
sizeof(Texture::TextureHandle);
// Load the TIC data.
if (tex_handle.tic_id != 0) {
tex_info.enabled = true;
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
}
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
// Load the TSC data
if (tex_handle.tsc_id != 0) {
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
}
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
if (tex_info.enabled)
textures.push_back(tex_info);
textures.push_back(tex_info);
}
return textures;
@@ -490,31 +488,28 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
const GPUVAddr tex_info_address =
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
// Load the TIC data.
if (tex_handle.tic_id != 0) {
tex_info.enabled = true;
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
}
auto tic_entry = GetTICEntry(tex_handle.tic_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
// Load the TSC data
if (tex_handle.tsc_id != 0) {
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
}
auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
// TODO(Subv): Workaround for BitField's move constructor being deleted.
std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
return tex_info;
}

View File

@@ -39,8 +39,10 @@ void MaxwellDMA::HandleCopy() {
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
const auto source_cpu = memory_manager.GpuToCpuAddress(source);
const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
ASSERT_MSG(source_cpu, "Invalid source GPU address");
ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
@@ -64,7 +66,7 @@ void MaxwellDMA::HandleCopy() {
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
return;
}
@@ -73,8 +75,8 @@ void MaxwellDMA::HandleCopy() {
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
const VAddr source_line = source_cpu + line * regs.src_pitch;
const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
const VAddr source_line = *source_cpu + line * regs.src_pitch;
const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
Memory::CopyBlock(dest_line, source_line, regs.x_count);
}
return;
@@ -87,12 +89,12 @@ void MaxwellDMA::HandleCopy() {
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(source_cpu, src_size);
rasterizer.FlushRegion(*source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu, dst_size);
rasterizer.InvalidateRegion(*dest_cpu, dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,8 +107,8 @@ void MaxwellDMA::HandleCopy() {
copy_size * src_bytes_per_pixel);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
*dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
} else {
ASSERT(regs.dst_params.size_z == 1);
@@ -119,7 +121,7 @@ void MaxwellDMA::HandleCopy() {
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
}
}

View File

@@ -217,9 +217,9 @@ enum class StoreType : u64 {
Signed8 = 1,
Unsigned16 = 2,
Signed16 = 3,
Bytes32 = 4,
Bytes64 = 5,
Bytes128 = 6,
Bits32 = 4,
Bits64 = 5,
Bits128 = 6,
};
enum class IMinMaxExchange : u64 {
@@ -981,6 +981,10 @@ union Instruction {
}
return false;
}
bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
} txq;
union {
@@ -1248,11 +1252,19 @@ union Instruction {
union {
BitField<20, 14, u64> offset;
BitField<34, 5, u64> index;
u64 GetOffset() const {
return offset * 4;
}
} cbuf34;
union {
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
s64 GetOffset() const {
return offset;
}
} cbuf36;
// Unsure about the size of this one.

View File

@@ -154,7 +154,8 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
const VAddr base_addr{PageSlot(gpu_addr)};
if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
base_addr == static_cast<u64>(PageStatus::Unmapped)) {
base_addr == static_cast<u64>(PageStatus::Unmapped) ||
base_addr == static_cast<u64>(PageStatus::Reserved)) {
return {};
}

View File

@@ -4,6 +4,7 @@
#pragma once
#include <atomic>
#include <functional>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
@@ -45,12 +46,9 @@ public:
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
return false;
}
/// Attempt to use a faster method to fill a region
virtual bool AccelerateFill(const void* config) {
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
return false;
}
@@ -66,5 +64,9 @@ public:
/// Increase/decrease the number of object in pages touching the specified region
virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
const DiskResourceLoadCallback& callback = {}) {}
};
} // namespace VideoCore

View File

@@ -19,7 +19,8 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.

View File

@@ -46,7 +46,9 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
const u8* source{Memory::GetPointer(*cpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {

View File

@@ -22,6 +22,7 @@
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -99,8 +100,9 @@ struct FramebufferCacheKey {
}
};
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
@@ -297,11 +299,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_gmem_bindpoint = 0;
u32 current_texture_bindpoint = 0;
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -325,47 +323,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the buffer
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
offset, static_cast<GLsizeiptr>(sizeof(ubo)));
// Bind the emulation info buffer
glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB: {
shader_program_manager->UseProgrammableVertexShader(
shader->GetProgramHandle(primitive_mode));
case Maxwell::ShaderProgram::VertexB:
shader_program_manager->UseProgrammableVertexShader(program_handle);
break;
}
case Maxwell::ShaderProgram::Geometry: {
shader_program_manager->UseProgrammableGeometryShader(
shader->GetProgramHandle(primitive_mode));
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseProgrammableGeometryShader(program_handle);
break;
}
case Maxwell::ShaderProgram::Fragment: {
shader_program_manager->UseProgrammableFragmentShader(
shader->GetProgramHandle(primitive_mode));
case Maxwell::ShaderProgram::Fragment:
shader_program_manager->UseProgrammableFragmentShader(program_handle);
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
UNREACHABLE();
}
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
// Configure global memory regions for this shader stage.
current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage),
shader, primitive_mode, current_gmem_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -380,6 +366,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
base_bindings = next_bindings;
}
SyncClipEnabled(clip_distances);
@@ -461,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
@@ -491,9 +479,14 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskCache(stop_loading, callback);
}
std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -505,7 +498,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
return;
return current_depth_stencil_usage;
}
current_framebuffer_config_state = fb_config_state;
@@ -575,12 +568,14 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
depth_surface->MarkAsModified(true, res_cache);
fbkey.zeta = depth_surface->Texture().handle;
fbkey.stencil_enable = regs.stencil_enable;
fbkey.stencil_enable = regs.stencil_enable &&
depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
}
SetupCachedFramebuffer(fbkey, current_state);
SyncViewport(current_state);
return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
}
void RasterizerOpenGL::Clear() {
@@ -648,8 +643,8 @@ void RasterizerOpenGL::Clear() {
return;
}
ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
regs.clear_buffers.RT.Value());
const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -664,11 +659,11 @@ void RasterizerOpenGL::Clear() {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
if (use_depth && use_stencil) {
if (clear_depth && clear_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
} else if (clear_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
} else if (use_stencil) {
} else if (clear_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
@@ -783,20 +778,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
MICROPROFILE_SCOPE(OpenGL_Blits);
if (Settings::values.use_accurate_gpu_emulation) {
// Skip the accelerated copy and perform a slow but more accurate copy
return false;
}
res_cache.FermiCopySurface(src, dst);
return true;
}
bool RasterizerOpenGL::AccelerateFill(const void* config) {
UNREACHABLE();
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
return true;
}
@@ -929,8 +915,9 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -970,7 +957,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
}
} else {
// Buffer is accessed directly, upload just what we use
size = used_buffer.GetSize() * sizeof(float);
size = used_buffer.GetSize();
}
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
@@ -978,92 +965,64 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
GLintptr const_buffer_offset = buffer_cache.UploadMemory(
const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
// Prepare values for multibind
bind_buffers[bindpoint] = buffer_cache.GetHandle();
bind_offsets[bindpoint] = const_buffer_offset;
bind_sizes[bindpoint] = size;
}
glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
// The first binding is reserved for emulation values
const GLuint ubo_base_binding = base_bindings.cbuf + 1;
glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) {
const auto& region =
global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage));
const GLuint block_index{shader->GetProgramResourceIndex(global_region)};
ASSERT(block_index != GL_INVALID_INDEX);
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
// TODO(Rodrigo): Use ARB_multi_bind here
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = base_bindings.gmem + bindpoint;
const auto& region = global_cache.GetGlobalRegion(entry, stage);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index,
current_bindpoint);
++current_bindpoint;
}
return current_bindpoint;
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const u32 current_bindpoint = current_unit + bindpoint;
// Bind the uniform to the sampler.
glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
shader->GetUniformLocation(entry), current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
state.texture_units[current_bindpoint].texture = 0;
continue;
}
const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
const GLuint handle =
state.texture_units[current_bindpoint].texture =
entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
state.texture_units[current_bindpoint].texture = handle;
state.texture_units[current_bindpoint].target = target;
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
MaxwellToGL::SwizzleSource(texture.tic.y_source);
state.texture_units[current_bindpoint].swizzle.b =
MaxwellToGL::SwizzleSource(texture.tic.z_source);
state.texture_units[current_bindpoint].swizzle.a =
MaxwellToGL::SwizzleSource(texture.tic.w_source);
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
state.texture_units[current_bindpoint].texture = 0;
}
}
return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <atomic>
#include <cstddef>
#include <map>
#include <memory>
@@ -33,6 +34,10 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
@@ -45,7 +50,8 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info);
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info);
~RasterizerOpenGL() override;
void DrawArrays() override;
@@ -55,12 +61,15 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
bool AccelerateFill(const void* config) override;
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -122,40 +131,25 @@ private:
* @param using_depth_fb If true, configure the depth/stencil framebuffer.
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
* @param single_color_target Specifies if a single color buffer target should be used.
* @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
* (requires using_depth_fb to be true)
*/
void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
std::pair<bool, bool> ConfigureFramebuffers(
OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true,
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
/**
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/**
* Configures the current global memory regions to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
/**
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
* @param current_unit The offset at which to start counting unused texture units.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
@@ -231,6 +225,7 @@ private:
std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
FramebufferConfigState current_framebuffer_config_state;
std::pair<bool, bool> current_depth_stencil_usage{};
std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;

View File

@@ -18,7 +18,6 @@
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/surface.h"
#include "video_core/textures/astc.h"
@@ -44,14 +43,14 @@ struct FormatTuple {
bool compressed;
};
static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
if (max_mip_level == 1) {
glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
}
}
@@ -128,6 +127,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
params.identity = SurfaceClass::Uploaded;
switch (params.target) {
case SurfaceTarget::Texture1D:
@@ -167,6 +167,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
params.is_layered = SurfaceTargetIsLayered(params.target);
params.is_array = SurfaceTargetIsArray(params.target);
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
@@ -194,6 +195,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::RenderTarget;
params.depth = 1;
params.max_mip_level = 1;
params.is_layered = false;
@@ -229,6 +231,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::DepthBuffer;
params.depth = 1;
params.max_mip_level = 1;
params.is_layered = false;
@@ -257,6 +260,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.height = config.height;
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.identity = SurfaceClass::Copy;
params.depth = 1;
params.max_mip_level = 1;
params.rt = {};
@@ -424,7 +428,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
}
}
static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -434,12 +439,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
0, 0, width, height, 1);
dst_surface->MarkAsModified(true, *this);
}
MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) {
void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
const GLuint copy_pbo_handle, const GLenum src_attachment,
const GLenum dst_attachment,
const std::size_t cubemap_face) {
MICROPROFILE_SCOPE(OpenGL_CopySurface);
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
@@ -519,63 +527,50 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
dst_surface->MarkAsModified(true, *this);
}
CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)),
cached_size_in_bytes(params.size_in_bytes) {
texture.Create();
const auto& rect{params.GetRect()};
texture.Create(gl_target);
// Keep track of previous texture bindings
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = texture.handle;
cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
glActiveTexture(GL_TEXTURE0);
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
gl_is_compressed = format_tuple.compressed;
if (!format_tuple.compressed) {
// Only pre-create the texture for non-compressed textures.
switch (params.target) {
case SurfaceTarget::Texture1D:
glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth());
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight());
}
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width);
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height, params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
}
ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
ApplyTextureDefaults(texture.handle, params.max_mip_level);
LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
SurfaceParams::SurfaceTargetName(params.target));
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
// Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -730,7 +725,6 @@ void CachedSurface::FlushGLBuffer() {
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
params.height);
ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
@@ -748,63 +742,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const auto& rect{params.GetRect(mip_map)};
// Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
std::size_t buffer_offset =
const auto x0 = static_cast<GLint>(rect.left);
const auto y0 = static_cast<GLint>(rect.bottom);
auto buffer_offset =
static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
static_cast<std::size_t>(x0)) *
GetBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
const GLuint target_tex = texture.handle;
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = target_tex;
cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
glActiveTexture(GL_TEXTURE0);
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
if (tuple.compressed) {
switch (params.target) {
case SurfaceTarget::Texture2D:
glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
glCompressedTextureSubImage2D(
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture3D:
glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)),
static_cast<GLsizei>(params.depth), 0, image_size,
&gl_buffer[mip_map][buffer_offset]);
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
layer_size, &gl_buffer[mip_map][buffer_offset]);
glCompressedTextureSubImage3D(
texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
layer_size, &gl_buffer[mip_map][buffer_offset]);
buffer_offset += layer_size;
}
break;
@@ -813,46 +794,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
static_cast<GLsizei>(params.size_in_bytes_gl),
&gl_buffer[mip_map][buffer_offset]);
glCompressedTextureSubImage2D(
texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
}
} else {
switch (params.target) {
case SurfaceTarget::Texture1D:
glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2D:
glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture3D:
glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureCubemap: {
std::size_t start = buffer_offset;
for (std::size_t face = 0; face < params.depth; ++face) {
glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
tuple.type, &gl_buffer[mip_map][buffer_offset]);
buffer_offset += params.LayerSizeGL(mip_map);
}
break;
@@ -861,9 +839,10 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
&gl_buffer[mip_map][buffer_offset]);
}
}
@@ -873,39 +852,47 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
void CachedSurface::EnsureTextureView() {
if (texture_view.handle != 0)
return;
// Compressed texture are not being created with immutable storage
UNIMPLEMENTED_IF(gl_is_compressed);
const GLenum target{TargetLayer()};
const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
constexpr GLuint min_layer = 0;
constexpr GLuint min_level = 0;
texture_view.Create();
glGenTextures(1, &texture_view.handle);
glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
params.max_mip_level, 0, 1);
OpenGLState cur_state = OpenGLState::GetCurState();
const auto& old_tex = cur_state.texture_units[0];
SCOPE_EXIT({
cur_state.texture_units[0] = old_tex;
cur_state.Apply();
});
cur_state.texture_units[0].texture = texture_view.handle;
cur_state.texture_units[0].target = target;
cur_state.Apply();
ApplyTextureDefaults(target, params.max_mip_level);
ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
reinterpret_cast<const GLint*>(swizzle.data()));
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
if (params.type == SurfaceType::Fill)
return;
MICROPROFILE_SCOPE(OpenGL_TextureUL);
for (u32 i = 0; i < params.max_mip_level; i++)
UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
}
void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w) {
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
return;
}
swizzle = {new_x, new_y, new_z, new_w};
const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
if (texture_view.handle != 0) {
glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
}
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {
read_framebuffer.Create();
@@ -1038,26 +1025,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
}
address += layer_size;
}
dst_surface->MarkAsModified(true, *this);
}
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
std::size_t cubemap_face = 0) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.draw.read_framebuffer = read_fb_handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.Apply();
u32 buffers{};
if (src_params.type == SurfaceType::ColorTexture) {
switch (src_params.target) {
case SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
case SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
src_surface->Texture().handle, 0);
glFramebufferTexture2D(
GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
case SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
src_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
case SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
SurfaceTargetToGL(src_params.target),
src_surface->Texture().handle, 0, 0);
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
SurfaceTargetToGL(src_params.target), 0, 0, 0);
break;
default:
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
}
switch (dst_params.target) {
case SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
case SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
dst_surface->Texture().handle, 0);
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
case SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
dst_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
case SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
SurfaceTargetToGL(dst_params.target),
dst_surface->Texture().handle, 0, 0);
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
SurfaceTargetToGL(dst_params.target), 0, 0, 0);
break;
default:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
}
buffers = GL_COLOR_BUFFER_BIT;
} else if (src_params.type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (src_params.type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_surface->Texture().handle, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true;
}
void RasterizerCacheOpenGL::FermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) {
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
ASSERT(src_params.width == dst_params.width);
ASSERT(src_params.height == dst_params.height);
ASSERT(src_params.pixel_format == dst_params.pixel_format);
ASSERT(src_params.block_height == dst_params.block_height);
ASSERT(src_params.is_tiled == dst_params.is_tiled);
ASSERT(src_params.depth == dst_params.depth);
ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
ASSERT(src_params.target == dst_params.target);
ASSERT(src_params.rt.index == dst_params.rt.index);
FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
auto src_surface = GetSurface(src_params, true);
auto dst_surface = GetSurface(dst_params, true);
BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
draw_framebuffer.handle);
dst_surface->MarkAsModified(true, *this);
}
void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,

View File

@@ -35,6 +35,14 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
struct SurfaceParams {
enum class SurfaceClass {
Uploaded,
RenderTarget,
DepthBuffer,
Copy,
};
static std::string SurfaceTargetName(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
@@ -210,6 +218,48 @@ struct SurfaceParams {
/// Initializes parameters for caching, should be called after everything has been initialized
void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
return "3D";
case SurfaceTarget::Texture1DArray:
return "1DArray";
case SurfaceTarget::Texture2DArray:
return "2DArray";
case SurfaceTarget::TextureCubemap:
return "Cube";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return fmt::format("TUK({})", static_cast<u32>(target));
}
}
std::string ClassName() const {
switch (identity) {
case SurfaceClass::Uploaded:
return "UP";
case SurfaceClass::RenderTarget:
return "RT";
case SurfaceClass::DepthBuffer:
return "DB";
case SurfaceClass::Copy:
return "CP";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
UNREACHABLE();
return fmt::format("CUK({})", static_cast<u32>(identity));
}
}
std::string IdentityString() const {
return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
}
bool is_tiled;
u32 block_width;
u32 block_height;
@@ -223,8 +273,10 @@ struct SurfaceParams {
u32 depth;
u32 unaligned_height;
SurfaceTarget target;
SurfaceClass identity;
u32 max_mip_level;
bool is_layered;
bool is_array;
bool srgb_conversion;
// Parameters used for caching
VAddr addr;
@@ -255,6 +307,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
SurfaceReserveKey res;
res.state = params;
res.state.identity = {}; // Ignore the origin of the texture
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
res.state.rt = {}; // Ignore rt config in caching
return res;
@@ -294,7 +347,7 @@ public:
}
const OGLTexture& TextureLayer() {
if (params.is_layered) {
if (params.is_array) {
return Texture();
}
EnsureTextureView();
@@ -329,6 +382,11 @@ public:
// Upload data in gl_buffer to this surface's texture
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -340,8 +398,8 @@ private:
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};
bool gl_is_compressed{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -363,7 +421,9 @@ public:
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config);
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect);
private:
void LoadSurface(const Surface& surface);
@@ -384,6 +444,10 @@ private:
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
/// previously been used. This is to prevent surfaces from being constantly created and

View File

@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
namespace OpenGL {
void OGLTexture::Create() {
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
glGenTextures(1, &handle);
glCreateTextures(target, 1, &handle);
}
void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
}
void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
const char* frag_shader, bool separable_program) {
const char* frag_shader, bool separable_program,
bool hint_retrievable) {
OGLShader vert, geo, frag;
if (vert_shader)
vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
frag.Create(frag_shader, GL_FRAGMENT_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(separable_program, vert.handle, geo.handle, frag.handle);
Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
}
void OGLProgram::Release() {

View File

@@ -28,7 +28,7 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
void Create();
void Create(GLenum target);
/// Deletes the internal OpenGL resource
void Release();
@@ -101,15 +101,15 @@ public:
}
template <typename... T>
void Create(bool separable_program, T... shaders) {
void Create(bool separable_program, bool hint_retrievable, T... shaders) {
if (handle != 0)
return;
handle = GLShader::LoadProgram(separable_program, shaders...);
handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
}
/// Creates a new internal OpenGL resource and stores the handle
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
bool separable_program = false);
bool separable_program = false, bool hint_retrievable = false);
/// Deletes the internal OpenGL resource
void Release();

View File

@@ -11,6 +11,7 @@
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/shader_ir.h"
@@ -19,189 +20,466 @@ namespace OpenGL {
using VideoCommon::Shader::ProgramCode;
// One UBO is always reserved for emulation values
constexpr u32 RESERVED_UBOS = 1;
struct UnspecializedShader {
std::string code;
GLShader::ShaderEntries entries;
Maxwell::ShaderProgram program_type;
};
namespace {
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
shader_config.offset);
const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
shader_config.offset);
ASSERT_MSG(address, "Invalid GPU address");
return *address;
}
/// Gets the shader program code from memory for the specified address
static ProgramCode GetShaderCode(VAddr addr) {
ProgramCode GetShaderCode(VAddr addr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
return program_code;
}
/// Helper function to set shader uniform block bindings for a single shader stage
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
Maxwell::ShaderStage binding, std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
}
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
"Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
/// Sets shader uniform block bindings for an entire shader program
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
sizeof(GLShader::MaxwellUniformData));
SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
sizeof(GLShader::MaxwellUniformData));
}
CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
: addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
GLShader::ProgramResult program_result;
GLenum gl_type{};
/// Gets the shader type from a Maxwell program type
constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
case Maxwell::ShaderProgram::VertexB:
CalculateProperties();
program_result = GLShader::GenerateVertexShader(setup);
gl_type = GL_VERTEX_SHADER;
break;
return GL_VERTEX_SHADER;
case Maxwell::ShaderProgram::Geometry:
CalculateProperties();
program_result = GLShader::GenerateGeometryShader(setup);
gl_type = GL_GEOMETRY_SHADER;
break;
return GL_GEOMETRY_SHADER;
case Maxwell::ShaderProgram::Fragment:
CalculateProperties();
program_result = GLShader::GenerateFragmentShader(setup);
gl_type = GL_FRAGMENT_SHADER;
break;
return GL_FRAGMENT_SHADER;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
UNREACHABLE();
return;
}
entries = program_result.second;
shader_length = entries.shader_length;
if (program_type != Maxwell::ShaderProgram::Geometry) {
OGLShader shader;
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
LabelGLObject(GL_PROGRAM, program.handle, addr);
} else {
// Store shader's code to lazily build it on draw
geometry_programs.code = program_result.first;
return GL_NONE;
}
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
const auto search{cbuf_resource_cache.find(buffer.GetHash())};
if (search == cbuf_resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
cbuf_resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) {
const auto search{gmem_resource_cache.find(global_mem.GetHash())};
if (search == gmem_resource_cache.end()) {
const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK,
global_mem.GetName().c_str())};
gmem_resource_cache[global_mem.GetHash()] = index;
return index;
}
return search->second;
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {
const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
uniform_cache[sampler.GetHash()] = index;
return index;
}
return search->second;
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
std::string source = "#version 430 core\n";
source += "layout (" + glsl_topology + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
source += geometry_programs.code;
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
SetShaderUniformBlockBindings(target_program.handle);
LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};
static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// sched instructions appear once every 4 instructions.
static constexpr std::size_t SchedPeriod = 4;
/// Gets if the current instruction offset is a scheduler instruction
constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
// Sched instructions appear once every 4 instructions.
constexpr std::size_t SchedPeriod = 4;
const std::size_t absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
}
static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
/// Describes primitive behavior on geometry shaders
constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
case GL_POINTS:
return {"points", "Points", 1};
case GL_LINES:
case GL_LINE_STRIP:
return {"lines", "Lines", 2};
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return {"lines_adjacency", "LinesAdj", 4};
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return {"triangles", "Triangles", 3};
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return {"triangles_adjacency", "TrianglesAdj", 6};
default:
return {"points", "Invalid", 1};
}
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
constexpr std::size_t start_offset = 10;
std::size_t offset = start_offset;
std::size_t size = start_offset * sizeof(u64);
while (offset < program.size()) {
const u64 inst = program[offset];
const u64 instruction = program[offset];
if (!IsSchedInstruction(offset, start_offset)) {
if (inst == 0 || (inst >> 52) == 0x50b) {
if (instruction == 0 || (instruction >> 52) == 0x50b) {
// End on Maxwell's "nop" instruction
break;
}
}
size += sizeof(inst);
size += sizeof(u64);
offset++;
}
return size;
// The last instruction is included in the program size
return std::min(size + sizeof(u64), program.size() * sizeof(u64));
}
void CachedShader::CalculateProperties() {
setup.program.real_size = CalculateProgramSize(setup.program.code);
setup.program.real_size_b = 0;
setup.program.unique_identifier = Common::CityHash64(
reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
const ProgramCode& code_b) {
u64 unique_identifier =
Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
if (program_type != Maxwell::ShaderProgram::VertexA) {
return unique_identifier;
}
// VertexA programs include two programs
std::size_t seed = 0;
boost::hash_combine(seed, unique_identifier);
const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
CalculateProgramSize(code_b));
boost::hash_combine(seed, identifier_b);
return static_cast<u64>(seed);
}
/// Creates an unspecialized program from code streams
GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
if (program_type == Maxwell::ShaderProgram::VertexA) {
std::size_t seed = 0;
boost::hash_combine(seed, setup.program.unique_identifier);
setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
const u64 identifier_b = Common::CityHash64(
reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
boost::hash_combine(seed, identifier_b);
setup.program.unique_identifier = static_cast<u64>(seed);
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(program_code_b);
}
setup.program.unique_identifier =
GetUniqueIdentifier(program_type, program_code, program_code_b);
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return GLShader::GenerateVertexShader(setup);
case Maxwell::ShaderProgram::Geometry:
return GLShader::GenerateGeometryShader(setup);
case Maxwell::ShaderProgram::Fragment:
return GLShader::GenerateFragmentShader(setup);
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
UNREACHABLE();
return {};
}
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
std::string source = "#version 430 core\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
for (const auto& cbuf : entries.const_buffers) {
source +=
fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
}
for (const auto& gmem : entries.global_memory_entries) {
source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
gmem.GetCbufOffset(), base_bindings.gmem++);
}
for (const auto& sampler : entries.samplers) {
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
if (program_type == Maxwell::ShaderProgram::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(primitive_mode);
source += "layout (" + std::string(glsl_topology) + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
}
source += code;
OGLShader shader;
shader.Create(source.c_str(), GetShaderType(program_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
return program;
}
std::set<GLenum> GetSupportedFormats() {
std::set<GLenum> supported_formats;
GLint num_formats{};
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
for (const GLint format : formats)
supported_formats.insert(static_cast<GLenum>(format));
return supported_formats;
}
} // namespace
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b)
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
const std::size_t code_size = CalculateProgramSize(program_code);
const std::size_t code_size_b =
program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
GLShader::ProgramResult program_result =
CreateProgram(program_type, program_code, program_code_b);
if (program_result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return;
}
code = program_result.first;
entries = program_result.second;
shader_length = entries.shader_length;
const ShaderDiskCacheRaw raw(unique_identifier, program_type,
static_cast<u32>(code_size / sizeof(u64)),
static_cast<u32>(code_size_b / sizeof(u64)),
std::move(program_code), std::move(program_code_b));
disk_cache.SaveRaw(raw);
}
CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
GLShader::ProgramResult result)
: addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
code = std::move(result.first);
entries = result.second;
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(primitive_mode, base_bindings);
if (!program) {
program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
LabelGLObject(GL_PROGRAM, program->handle, addr);
}
handle = program->handle;
}
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
return {handle, base_bindings};
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
auto& programs = entry->second;
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
}
}
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode) {
if (target_program) {
return target_program->handle;
}
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
target_program = TryLoadProgram(primitive_mode, base_bindings);
if (!target_program) {
target_program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
}
LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
return target_program->handle;
};
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
BaseBindings base_bindings) const {
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
BaseBindings base_bindings) const {
return {unique_identifier, base_bindings, primitive_mode};
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
: RasterizerCache{rasterizer}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
const auto transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
const auto [raws, usages] = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
const auto supported_formats{GetSupportedFormats()};
const auto unspecialized{
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
if (stop_loading)
return;
// Build shaders
if (callback)
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
for (std::size_t i = 0; i < usages.size(); ++i) {
if (stop_loading)
return;
const auto& usage{usages[i]};
LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
i + 1, usages.size());
const auto& unspec{unspecialized.at(usage.unique_identifier)};
const auto dump_it = dumps.find(usage);
CachedProgram shader;
if (dump_it != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
if (!shader) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
dumps.clear();
}
}
if (!shader) {
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
usage.bindings, usage.primitive, true);
}
precompiled_programs.insert({usage, std::move(shader)});
if (callback)
callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
}
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
// precompiling them
for (std::size_t i = 0; i < usages.size(); ++i) {
const auto& usage{usages[i]};
if (dumps.find(usage) == dumps.end()) {
const auto& program = precompiled_programs.at(usage);
disk_cache.SaveDump(usage, program->handle);
}
}
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
return {};
}
CachedProgram shader = std::make_shared<OGLProgram>();
shader->handle = glCreateProgram();
glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
static_cast<GLsizei>(dump.binary.size()));
GLint link_status{};
glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
return {};
}
return shader;
}
std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws,
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
std::unordered_map<u64, UnspecializedShader> unspecialized;
if (callback)
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading)
return {};
const auto& raw{raws[i]};
const u64 unique_identifier = raw.GetUniqueIdentifier();
const u64 calculated_hash =
GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
if (unique_identifier != calculated_hash) {
LOG_ERROR(
Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateTransferable();
return {};
}
GLShader::ProgramResult result;
if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
// If it's stored in the precompiled file, avoid decompiling it here
const auto& stored_decompiled{it->second};
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
result =
CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
precompiled_shaders.insert({unique_identifier, result});
unspecialized.insert(
{raw.GetUniqueIdentifier(),
{std::move(result.first), std::move(result.second), raw.GetProgramType()}});
if (callback)
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
return unspecialized;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
@@ -215,7 +493,23 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!shader) {
// No shader found - create a new one
shader = std::make_shared<CachedShader>(program_addr, program);
ProgramCode program_code = GetShaderCode(program_addr);
ProgramCode program_code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
}
const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
const auto found = precompiled_shaders.find(unique_identifier);
if (found != precompiled_shaders.end()) {
shader =
std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
precompiled_programs, found->second);
} else {
shader = std::make_shared<CachedShader>(
program_addr, unique_identifier, program, disk_cache, precompiled_programs,
std::move(program_code), std::move(program_code_b));
}
Register(shader);
}

View File

@@ -5,27 +5,49 @@
#pragma once
#include <array>
#include <map>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;
} // namespace Core
namespace OpenGL {
class CachedShader;
class RasterizerOpenGL;
struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
class CachedShader final : public RasterizerCacheObject {
public:
CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
ProgramCode&& program_code, ProgramCode&& program_code_b);
explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
ShaderDiskCacheOpenGL& disk_cache,
const PrecompiledPrograms& precompiled_programs,
GLShader::ProgramResult result);
VAddr GetAddr() const override {
return addr;
@@ -44,86 +66,75 @@ public:
}
/// Gets the GL program handle for the shader
GLuint GetProgramHandle(GLenum primitive_mode) {
if (program_type != Maxwell::ShaderProgram::Geometry) {
return program.handle;
}
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
"ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
}
}
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem);
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
private:
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
u32 max_vertices, const std::string& debug_name);
void CalculateProperties();
VAddr addr;
std::size_t shader_length;
Maxwell::ShaderProgram program_type;
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
// Non-geometry program.
OGLProgram program;
// Geometry programs. These are needed because GLSL needs an input topology but it's not
// declared by the hardware. Workaround this issue by generating a different shader per input
// topology class.
struct {
std::string code;
OGLProgram points;
OGLProgram lines;
OGLProgram lines_adjacency;
OGLProgram triangles;
OGLProgram triangles_adjacency;
} geometry_programs;
struct GeometryPrograms {
CachedProgram points;
CachedProgram lines;
CachedProgram lines_adjacency;
CachedProgram triangles;
CachedProgram triangles_adjacency;
};
std::map<u32, GLuint> cbuf_resource_cache;
std::map<u32, GLuint> gmem_resource_cache;
std::map<u32, GLint> uniform_cache;
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode);
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
VAddr addr{};
u64 unique_identifier{};
Maxwell::ShaderProgram program_type{};
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
std::size_t shader_length{};
GLShader::ShaderEntries entries;
std::string code;
std::unordered_map<BaseBindings, CachedProgram> programs;
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
std::unordered_map<u32, GLuint> cbuf_resource_cache;
std::unordered_map<u32, GLuint> gmem_resource_cache;
std::unordered_map<u32, GLint> uniform_cache;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
private:
std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws,
const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
ShaderDiskCacheOpenGL disk_cache;
PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs;
};
} // namespace OpenGL

View File

@@ -193,15 +193,14 @@ public:
ShaderEntries GetShaderEntries() const {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
cbuf.first);
}
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
entries.samplers.emplace_back(sampler);
}
for (const auto& gmem : ir.GetGlobalMemoryBases()) {
entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
GetGlobalMemoryBlock(gmem));
entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -374,7 +373,8 @@ private:
void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {");
code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
") uniform " + GetConstBufferBlock(index) + " {");
code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
code.AddLine("};");
code.AddNewLine();
@@ -383,7 +383,10 @@ private:
void DeclareGlobalMemory() {
for (const auto& entry : ir.GetGlobalMemoryBases()) {
code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {");
const std::string binding =
fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
code.AddLine("layout (std430, binding = " + binding + ") buffer " +
GetGlobalMemoryBlock(entry) + " {");
code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
code.AddLine("};");
code.AddNewLine();
@@ -413,7 +416,8 @@ private:
if (sampler.IsShadow())
sampler_type += "Shadow";
code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
}
if (!samplers.empty())
code.AddNewLine();
@@ -538,8 +542,9 @@ private:
if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4,
offset_imm % 4);
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4);
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <set>
#include <string>
#include <utility>
#include <vector>
@@ -18,64 +19,29 @@ class ShaderIR;
namespace OpenGL::GLShader {
struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
Maxwell::ShaderStage stage, const std::string& name, u32 index)
: VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
: VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
u32 GetIndex() const {
return index;
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | index;
}
private:
std::string name;
Maxwell::ShaderStage stage{};
u32 index{};
};
class SamplerEntry : public VideoCommon::Shader::Sampler {
public:
explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
const std::string& name)
: VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex());
}
private:
std::string name;
Maxwell::ShaderStage stage{};
};
class GlobalMemoryEntry {
public:
explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
std::string name)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
u32 GetCbufIndex() const {
return cbuf_index;
@@ -85,23 +51,9 @@ public:
return cbuf_offset;
}
const std::string& GetName() const {
return name;
}
Maxwell::ShaderStage GetStage() const {
return stage;
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset;
}
private:
u32 cbuf_index{};
u32 cbuf_offset{};
Maxwell::ShaderStage stage{};
std::string name;
};
struct ShaderEntries {
@@ -112,8 +64,6 @@ struct ShaderEntries {
std::size_t shader_length{};
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,

View File

@@ -0,0 +1,656 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstring>
#include <fmt/format.h>
#include <lz4.h>
#include "common/assert.h"
#include "common/common_paths.h"
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/scm_rev.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
namespace OpenGL {
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
Raw,
Usage,
};
enum class PrecompiledEntryKind : u32 {
Decompiled,
Dump,
};
constexpr u32 NativeVersion = 1;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 12);
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
namespace {
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
std::memcpy(hash.data(), Common::g_shader_cache_version, length);
return hash;
}
template <typename T>
std::vector<u8> CompressData(const T* source, std::size_t source_size) {
if (source_size > LZ4_MAX_INPUT_SIZE) {
// Source size exceeds LZ4 maximum input size
return {};
}
const auto source_size_int = static_cast<int>(source_size);
const int max_compressed_size = LZ4_compressBound(source_size_int);
std::vector<u8> compressed(max_compressed_size);
const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
reinterpret_cast<char*>(compressed.data()),
source_size_int, max_compressed_size);
if (compressed_size <= 0) {
// Compression failed
return {};
}
compressed.resize(compressed_size);
return compressed;
}
std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
std::vector<u8> uncompressed(uncompressed_size);
const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
reinterpret_cast<char*>(uncompressed.data()),
static_cast<int>(compressed.size()),
static_cast<int>(uncompressed.size()));
if (static_cast<int>(uncompressed_size) != size_check) {
// Decompression failed
return {};
}
return uncompressed;
}
} // namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b)
: unique_identifier{unique_identifier}, program_type{program_type},
program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
return false;
}
u32 program_code_size{};
u32 program_code_size_b{};
if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
program_code.resize(program_code_size);
program_code_b.resize(program_code_size_b);
if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
return false;
if (HasProgramA() &&
file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
return false;
}
return true;
}
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(program_type)) != 1 ||
file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
return false;
}
if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
return false;
if (HasProgramA() &&
file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
return false;
}
return true;
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
if (!Settings::values.use_disk_shader_cache || !has_title_id)
return {};
tried_to_load = true;
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
GetTitleID());
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL,
"Failed to get transferable cache version for title id={} - skipping",
GetTitleID());
return {};
}
if (version < NativeVersion) {
LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
file.Close();
InvalidateTransferable();
return {};
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator - skipping");
return {};
}
// Version is valid, load the shaders
std::vector<ShaderDiskCacheRaw> raws;
std::vector<ShaderDiskCacheUsage> usages;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
return {};
}
switch (kind) {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
return {};
}
transferable.insert({entry.GetUniqueIdentifier(), {}});
raws.push_back(std::move(entry));
break;
}
case TransferableEntryKind::Usage: {
ShaderDiskCacheUsage usage{};
if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
return {};
}
usages.push_back(std::move(usage));
break;
}
default:
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
static_cast<u32>(kind));
return {};
}
}
return {{raws, usages}};
}
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!IsUsable())
return {};
FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
return {};
}
const auto result = LoadPrecompiledFile(file);
if (!result) {
LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={} - removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
return {};
}
return *result;
}
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
ShaderCacheVersionHash file_hash{};
if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
return {};
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
return {};
}
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
while (file.Tell() < file.GetSize()) {
PrecompiledEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
return {};
}
switch (kind) {
case PrecompiledEntryKind::Decompiled: {
u64 unique_identifier{};
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
return {};
const auto entry = LoadDecompiledEntry(file);
if (!entry)
return {};
decompiled.insert({unique_identifier, std::move(*entry)});
break;
}
case PrecompiledEntryKind::Dump: {
ShaderDiskCacheUsage usage;
if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
return {};
ShaderDiskCacheDump dump;
if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
return {};
u32 binary_length{};
u32 compressed_size{};
if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
return {};
}
std::vector<u8> compressed_binary(compressed_size);
if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
compressed_binary.size()) {
return {};
}
dump.binary = DecompressData(compressed_binary, binary_length);
if (dump.binary.empty()) {
return {};
}
dumps.insert({usage, dump});
break;
}
default:
return {};
}
}
return {{decompiled, dumps}};
}
std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
FileUtil::IOFile& file) {
u32 code_size{};
u32 compressed_code_size{};
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
return {};
}
std::vector<u8> compressed_code(compressed_code_size);
if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
return {};
}
const std::vector<u8> code = DecompressData(compressed_code, code_size);
if (code.empty()) {
return {};
}
ShaderDiskCacheDecompiled entry;
entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
u32 const_buffers_count{};
if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
return {};
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
u8 is_indirect{};
if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
return {};
}
entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
}
u32 samplers_count{};
if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
return {};
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_array{};
u8 is_shadow{};
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
return {};
}
entry.entries.samplers.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
}
u32 global_memory_count{};
if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
return {};
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
return {};
}
entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
}
for (auto& clip_distance : entry.entries.clip_distances) {
u8 clip_distance_raw{};
if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
return {};
clip_distance = clip_distance_raw != 0;
}
u64 shader_length{};
if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
return {};
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
}
bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
const std::string& code,
const std::vector<u8>& compressed_code,
const GLShader::ShaderEntries& entries) {
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
file.WriteObject(unique_identifier) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
return false;
}
if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
return false;
for (const auto& cbuf : entries.const_buffers) {
if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
return false;
}
}
if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
return false;
for (const auto& sampler : entries.samplers) {
if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
return false;
}
}
if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
return false;
for (const auto& gmem : entries.global_memory_entries) {
if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
return false;
}
return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
if (!FileUtil::Delete(GetTransferablePath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
GetTransferablePath());
}
InvalidatePrecompiled();
}
void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
if (!FileUtil::Delete(GetPrecompiledPath())) {
LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
}
}
void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!IsUsable())
return;
const u64 id = entry.GetUniqueIdentifier();
if (transferable.find(id) != transferable.end()) {
// The shader already exists
return;
}
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
file.Close();
InvalidateTransferable();
return;
}
transferable.insert({id, {}});
}
void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
if (!IsUsable())
return;
const auto it = transferable.find(usage.unique_identifier);
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
ASSERT(usages.find(usage) == usages.end());
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
file.Close();
InvalidateTransferable();
return;
}
}
void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries) {
if (!IsUsable())
return;
const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
return;
}
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
LOG_ERROR(Render_OpenGL,
"Failed to save decompiled entry to the precompiled file - removing");
file.Close();
InvalidatePrecompiled();
}
}
void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
if (!IsUsable())
return;
GLint binary_length{};
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
usage.unique_identifier);
return;
}
FileUtil::IOFile file = AppendPrecompiledFile();
if (!file.IsOpen())
return;
if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
compressed_binary.size()) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
usage.unique_identifier);
file.Close();
InvalidatePrecompiled();
return;
}
}
bool ShaderDiskCacheOpenGL::IsUsable() const {
return tried_to_load && Settings::values.use_disk_shader_cache;
}
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
if (!EnsureDirectories())
return {};
const auto transferable_path{GetTransferablePath()};
const bool existed = FileUtil::Exists(transferable_path);
FileUtil::IOFile file(transferable_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
return {};
}
if (!existed || file.GetSize() == 0) {
// If the file didn't exist, write its version
if (file.WriteObject(NativeVersion) != 1) {
LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
transferable_path);
return {};
}
}
return file;
}
FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
if (!EnsureDirectories())
return {};
const auto precompiled_path{GetPrecompiledPath()};
const bool existed = FileUtil::Exists(precompiled_path);
FileUtil::IOFile file(precompiled_path, "ab");
if (!file.IsOpen()) {
LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
return {};
}
if (!existed || file.GetSize() == 0) {
const auto hash{GetShaderCacheVersionHash()};
if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
precompiled_path);
return {};
}
}
return file;
}
bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
const auto CreateDir = [](const std::string& dir) {
if (!FileUtil::CreateDir(dir)) {
LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
return false;
}
return true;
};
return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
CreateDir(GetPrecompiledDir());
}
std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
}
std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
return GetBaseDir() + DIR_SEP "transferable";
}
std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
return GetBaseDir() + DIR_SEP "precompiled";
}
std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
}
} // namespace OpenGL

View File

@@ -0,0 +1,245 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;
}
namespace FileUtil {
class IOFile;
}
namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
/// Allocated bindings used by an OpenGL shader program
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
bool operator==(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
}
bool operator!=(const BaseBindings& rhs) const {
return !operator==(rhs);
}
};
/// Describes how a shader is used
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
BaseBindings bindings;
GLenum primitive{};
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, bindings, primitive) ==
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
return !operator==(rhs);
}
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
}
};
} // namespace std
namespace OpenGL {
/// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
u32 program_code_size, u32 program_code_size_b,
ProgramCode program_code, ProgramCode program_code_b);
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
u64 GetUniqueIdentifier() const {
return unique_identifier;
}
bool HasProgramA() const {
return program_type == Maxwell::ShaderProgram::VertexA;
}
Maxwell::ShaderProgram GetProgramType() const {
return program_type;
}
Maxwell::ShaderStage GetProgramStage() const {
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
return Maxwell::ShaderStage::Vertex;
case Maxwell::ShaderProgram::TesselationControl:
return Maxwell::ShaderStage::TesselationControl;
case Maxwell::ShaderProgram::TesselationEval:
return Maxwell::ShaderStage::TesselationEval;
case Maxwell::ShaderProgram::Geometry:
return Maxwell::ShaderStage::Geometry;
case Maxwell::ShaderProgram::Fragment:
return Maxwell::ShaderStage::Fragment;
}
UNREACHABLE();
}
const ProgramCode& GetProgramCode() const {
return program_code;
}
const ProgramCode& GetProgramCodeB() const {
return program_code_b;
}
private:
u64 unique_identifier{};
Maxwell::ShaderProgram program_type{};
u32 program_code_size{};
u32 program_code_size_b{};
ProgramCode program_code;
ProgramCode program_code_b;
};
/// Contains decompiled data from a shader
struct ShaderDiskCacheDecompiled {
std::string code;
GLShader::ShaderEntries entries;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
GLenum binary_format;
std::vector<u8> binary;
};
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable() const;
/// Removes the precompiled cache file.
void InvalidatePrecompiled() const;
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
/// Saves shader usage to the transferable file. Does not check for collisions.
void SaveUsage(const ShaderDiskCacheUsage& usage);
/// Saves a decompiled entry to the precompiled file. Does not check for collisions.
void SaveDecompiled(u64 unique_identifier, const std::string& code,
const GLShader::ShaderEntries& entries);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
LoadPrecompiledFile(FileUtil::IOFile& file);
/// Loads a decompiled cache entry from the passed file. Returns empty on failure.
std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
/// Saves a decompiled entry to the passed file. Returns true on success.
bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
const std::vector<u8>& compressed_code,
const GLShader::ShaderEntries& entries);
/// Returns if the cache can be used
bool IsUsable() const;
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
/// Opens current game's precompiled file and write it's header if it doesn't exist
FileUtil::IOFile AppendPrecompiledFile() const;
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Gets current game's transferable file path
std::string GetTransferablePath() const;
/// Gets current game's precompiled file path
std::string GetPrecompiledPath() const;
/// Get user's transferable directory path
std::string GetTransferableDir() const;
/// Get user's precompiled directory path
std::string GetPrecompiledDir() const;
/// Get user's shader directory path
std::string GetBaseDir() const;
/// Get current game's title id
std::string GetTitleID() const;
// Copre system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
bool tried_to_load{};
};
} // namespace OpenGL

View File

@@ -20,15 +20,14 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 position;
layout(std140) uniform vs_config {
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -78,7 +77,6 @@ void main() {
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
// Version is intentionally skipped in shader generation, it's added by the lazy compilation.
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -89,7 +87,7 @@ ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
layout (location = 0) in vec4 gs_position[];
layout (location = 0) out vec4 position;
layout (std140) uniform gs_config {
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -112,8 +110,7 @@ void main() {
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
@@ -129,7 +126,7 @@ layout (location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
layout (std140) uniform fs_config {
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;

View File

@@ -26,12 +26,10 @@ struct ShaderSetup {
ProgramCode code;
ProgramCode code_b; // Used for dual vertex shaders
u64 unique_identifier;
std::size_t real_size;
std::size_t real_size_b;
} program;
/// Used in scenarios where we have a dual vertex shaders
void SetProgramB(ProgramCode&& program_b) {
void SetProgramB(ProgramCode program_b) {
program.code_b = std::move(program_b);
has_program_b = true;
}

View File

@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
* @returns Handle of the newly created OpenGL program object
*/
template <typename... T>
GLuint LoadProgram(bool separable_program, T... shaders) {
GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
// Link the program
LOG_DEBUG(Render_OpenGL, "Linking program...");
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
if (separable_program) {
glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
}
if (hint_retrievable) {
glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
}
glLinkProgram(program_id);

View File

@@ -462,29 +462,35 @@ void OpenGLState::ApplyPolygonOffset() const {
}
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
if (texture_unit.texture != cur_state_texture_unit.texture) {
glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
glBindTexture(texture_unit.target, texture_unit.texture);
}
// Update the texture swizzle
if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
texture_unit.swizzle.b, texture_unit.swizzle.a};
glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
if (textures[i] != cur_state_texture_unit.texture) {
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
}
}
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data());
}
}
void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{}, last{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
for (std::size_t i = 0; i < std::size(samplers); ++i) {
samplers[i] = texture_units[i].sampler;

View File

@@ -126,26 +126,14 @@ public:
struct TextureUnit {
GLuint texture; // GL_TEXTURE_BINDING_2D
GLuint sampler; // GL_SAMPLER_BINDING
GLenum target;
struct {
GLint r; // GL_TEXTURE_SWIZZLE_R
GLint g; // GL_TEXTURE_SWIZZLE_G
GLint b; // GL_TEXTURE_SWIZZLE_B
GLint a; // GL_TEXTURE_SWIZZLE_A
} swizzle;
void Unbind() {
texture = 0;
swizzle.r = GL_RED;
swizzle.g = GL_GREEN;
swizzle.b = GL_BLUE;
swizzle.a = GL_ALPHA;
}
void Reset() {
Unbind();
sampler = 0;
target = GL_TEXTURE_2D;
}
};
std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;

View File

@@ -98,8 +98,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
: VideoCore::RendererBase{window} {}
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
: VideoCore::RendererBase{window}, system{system} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -171,10 +171,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
Memory::GetPointer(framebuffer_addr),
gl_framebuffer_data.data(), true);
state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
// Update existing texture
@@ -182,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
screen_info.texture.gl_format, screen_info.texture.gl_type,
gl_framebuffer_data.data());
glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, screen_info.texture.gl_format,
screen_info.texture.gl_type, gl_framebuffer_data.data());
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
state.texture_units[0].texture = 0;
state.Apply();
}
}
@@ -199,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
// Update existing texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
state.texture_units[0].texture = 0;
state.Apply();
const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
}
/**
@@ -249,26 +233,13 @@ void RendererOpenGL::InitOpenGLObjects() {
sizeof(ScreenRectVertex));
// Allocate textures for the screen
screen_info.texture.resource.Create();
screen_info.texture.resource.Create(GL_TEXTURE_2D);
// Allocation of storage is deferred until the first frame, when we
// know the framebuffer size.
state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
const GLuint texture = screen_info.texture.resource.handle;
glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
screen_info.display_texture = screen_info.texture.resource.handle;
state.texture_units[0].texture = 0;
state.Apply();
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
}
@@ -279,25 +250,24 @@ void RendererOpenGL::CreateRasterizer() {
}
// Initialize sRGB Usage
OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info);
rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
const Tegra::FramebufferConfig& framebuffer) {
texture.width = framebuffer.width;
texture.height = framebuffer.height;
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
internal_format = GL_RGBA;
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
gl_framebuffer_data.resize(texture.width * texture.height * 4);
break;
default:
internal_format = GL_RGBA;
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -306,15 +276,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
UNREACHABLE();
}
state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
texture.gl_format, texture.gl_type, nullptr);
state.texture_units[0].texture = 0;
state.Apply();
texture.resource.Release();
texture.resource.Create(GL_TEXTURE_2D);
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
}
void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -356,7 +320,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
}};
state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
// Workaround brigthness problems in SMO by enabling sRGB in the final output
// if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();

View File

@@ -12,6 +12,10 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
@@ -41,7 +45,7 @@ struct ScreenInfo {
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& window);
explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
@@ -72,6 +76,8 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
Core::System& system;
OpenGLState state;
// OpenGL object IDs

View File

@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -35,7 +35,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 p
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20);

View File

@@ -26,7 +26,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
const bool input_signed = instr.conversion.is_input_signed;
@@ -72,7 +72,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
@@ -110,7 +110,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -27,14 +27,14 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::FFMA_CR: {
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
}
case OpCode::Id::FFMA_RR:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::FFMA_RC: {
return {GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
}
case OpCode::Id::FFMA_IMM:
return {GetImmediate19(instr), GetRegister(instr.gpr39)};

View File

@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u3
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);

View File

@@ -39,13 +39,14 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
GetRegister(instr.gpr39)};
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
instr.hfma2.type_b,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;

View File

@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code,
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -80,7 +80,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
Node index = GetRegister(instr.gpr8);
const Node op_a =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
switch (instr.ld_c.type.Value()) {
case Tegra::Shader::UniformType::Single:
@@ -89,7 +89,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
case Tegra::Shader::UniformType::Double: {
const Node op_b =
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
SetTemporal(bb, 0, op_a);
SetTemporal(bb, 1, op_b);
@@ -104,19 +104,42 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
static_cast<unsigned>(instr.ld_l.unknown.Value()));
static_cast<u32>(instr.ld_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
const Node lmem = GetLocalMemory(index);
const auto GetLmem = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
immediate_offset);
return GetLocalMemory(address);
};
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetRegister(bb, instr.gpr0, lmem);
case Tegra::Shader::StoreType::Bits32:
case Tegra::Shader::StoreType::Bits64:
case Tegra::Shader::StoreType::Bits128: {
const u32 count = [&]() {
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits32:
return 1;
case Tegra::Shader::StoreType::Bits64:
return 2;
case Tegra::Shader::StoreType::Bits128:
return 4;
default:
UNREACHABLE();
return 0;
}
}();
for (u32 i = 0; i < count; ++i)
SetTemporal(bb, i, GetLmem(i * 4));
for (u32 i = 0; i < count; ++i)
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
break;
}
default:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
}
@@ -142,7 +165,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
ASSERT(cbuf != nullptr);
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
const auto cbuf_offset = cbuf_offset_imm->GetValue();
bb.push_back(Comment(
fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
@@ -202,12 +225,20 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<u32>(instr.st_l.unknown.Value()));
const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
Immediate(static_cast<s32>(instr.smem_imm)));
const auto GetLmemAddr = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
};
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bytes32:
SetLocalMemory(bb, index, GetRegister(instr.gpr0));
case Tegra::Shader::StoreType::Bits128:
SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
case Tegra::Shader::StoreType::Bits64:
SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
case Tegra::Shader::StoreType::Bits32:
SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
break;
default:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
@@ -324,15 +355,18 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
MetaTexture meta{sampler, element};
const Node value = Operation(OperationCode::F4TextureQueryDimensions,
std::move(meta), GetRegister(instr.gpr8));
SetTemporal(bb, element, value);
if (instr.txq.IsComponentEnabled(element)) {
MetaTexture meta{sampler, element};
const Node value = Operation(OperationCode::F4TextureQueryDimensions,
std::move(meta), GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
}
}
for (u32 i = 0; i < 4; ++i) {
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
@@ -734,4 +768,4 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader
} // namespace VideoCommon::Shader

View File

@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();

View File

@@ -32,13 +32,14 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
return {instr.xmad.merge_56,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RR:
return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RC:
return {false, GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::XMAD_IMM:
return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};

View File

@@ -236,6 +236,11 @@ private:
class ConstBuffer {
public:
explicit ConstBuffer(u32 max_offset, bool is_indirect)
: max_offset{max_offset}, is_indirect{is_indirect} {}
ConstBuffer() = default;
void MarkAsUsed(u64 offset) {
max_offset = std::max(max_offset, static_cast<u32>(offset));
}
@@ -249,7 +254,11 @@ public:
}
u32 GetSize() const {
return max_offset + 1;
return max_offset + sizeof(float);
}
u32 GetMaxOffset() const {
return max_offset;
}
private:

View File

@@ -50,6 +50,24 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
}
}
bool SurfaceTargetIsArray(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
case SurfaceTarget::TextureCubemap:
return false;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
return true;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
return false;
}
}
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
switch (format) {
case Tegra::DepthFormat::S8_Z24_UNORM:

View File

@@ -109,8 +109,7 @@ enum class SurfaceType {
ColorTexture = 0,
Depth = 1,
DepthStencil = 2,
Fill = 3,
Invalid = 4,
Invalid = 3,
};
enum class SurfaceTarget {
@@ -441,6 +440,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
bool SurfaceTargetIsLayered(SurfaceTarget target);
bool SurfaceTargetIsArray(SurfaceTarget target);
PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);

View File

@@ -182,7 +182,7 @@ struct TICEntry {
};
union {
BitField<0, 16, u32> height_minus_1;
BitField<16, 15, u32> depth_minus_1;
BitField<16, 14, u32> depth_minus_1;
};
union {
BitField<6, 13, u32> mip_lod_bias;
@@ -317,7 +317,6 @@ struct FullTextureInfo {
u32 index;
TICEntry tic;
TSCEntry tsc;
bool enabled;
};
/// Returns the number of bytes per pixel of the input texture format.

View File

@@ -11,8 +11,9 @@
namespace VideoCore {
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) {
return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
Core::System& system) {
return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {

View File

@@ -6,6 +6,10 @@
#include <memory>
namespace Core {
class System;
}
namespace Core::Frontend {
class EmuWindow;
}
@@ -20,7 +24,8 @@ class RendererBase;
* @note The returned renderer instance is simply allocated. Its Init()
* function still needs to be called to fully complete its setup.
*/
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window);
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
Core::System& system);
u16 GetResolutionScaleFactor(const RendererBase& renderer);

View File

@@ -29,6 +29,15 @@ void EmuThread::run() {
stop_run = false;
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
emit LoadProgress(stage, value, total);
});
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
// holds whether the cpu was running during the last iteration,
// so that the DebugModeLeft signal can be emitted before the
// next execution step

View File

@@ -22,6 +22,10 @@ class GGLWidgetInternal;
class GMainWindow;
class GRenderWindow;
namespace VideoCore {
enum class LoadCallbackStage;
}
class EmuThread : public QThread {
Q_OBJECT
@@ -75,7 +79,7 @@ public:
private:
bool exec_step = false;
bool running = false;
std::atomic<bool> stop_run{false};
std::atomic_bool stop_run{false};
std::mutex running_mutex;
std::condition_variable running_cv;
@@ -101,6 +105,8 @@ signals:
void DebugModeLeft();
void ErrorThrown(Core::System::ResultStatus, std::string);
void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
};
class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {

View File

@@ -370,6 +370,8 @@ void Config::ReadValues() {
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
Settings::values.use_disk_shader_cache =
qt_config->value("use_disk_shader_cache", false).toBool();
Settings::values.use_accurate_gpu_emulation =
qt_config->value("use_accurate_gpu_emulation", false).toBool();
@@ -629,6 +631,7 @@ void Config::SaveValues() {
qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
qt_config->setValue("frame_limit", Settings::values.frame_limit);
qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
// Cast to double because Qt's written float values are not human-readable

View File

@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
const QColor new_bg_color = QColorDialog::getColor(bg_color);
if (!new_bg_color.isValid())
return;
bg_color = new_bg_color;
ui->bg_button->setStyleSheet(
QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
UpdateBackgroundColorButton(new_bg_color);
});
}
@@ -75,11 +73,10 @@ void ConfigureGraphics::setConfiguration() {
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
ui->frame_limit->setValue(Settings::values.frame_limit);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue);
ui->bg_button->setStyleSheet(
QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue));
}
void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +84,19 @@ void ConfigureGraphics::applyConfiguration() {
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
Settings::values.frame_limit = ui->frame_limit->value();
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
Settings::values.bg_red = static_cast<float>(bg_color.redF());
Settings::values.bg_green = static_cast<float>(bg_color.greenF());
Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
}
void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
bg_color = color;
QPixmap pixmap(ui->bg_button->size());
pixmap.fill(bg_color);
const QIcon color_icon(pixmap);
ui->bg_button->setIcon(color_icon);
}

View File

@@ -23,6 +23,8 @@ public:
private:
void setConfiguration();
void UpdateBackgroundColorButton(QColor color);
std::unique_ptr<Ui::ConfigureGraphics> ui;
QColor bg_color;
};

View File

@@ -49,6 +49,13 @@
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="use_disk_shader_cache">
<property name="text">
<string>Use disk shader cache</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_accurate_gpu_emulation">
<property name="text">

View File

@@ -13,7 +13,6 @@
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/timer.h"
#include "core/hle/kernel/wait_object.h"
#include "core/memory.h"
@@ -155,8 +154,6 @@ std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitO
switch (object.GetHandleType()) {
case Kernel::HandleType::ReadableEvent:
return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object));
case Kernel::HandleType::Timer:
return std::make_unique<WaitTreeTimer>(static_cast<const Kernel::Timer&>(object));
case Kernel::HandleType::Thread:
return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object));
default:
@@ -348,23 +345,6 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeEvent::GetChildren() const {
return list;
}
WaitTreeTimer::WaitTreeTimer(const Kernel::Timer& object) : WaitTreeWaitObject(object) {}
WaitTreeTimer::~WaitTreeTimer() = default;
std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeTimer::GetChildren() const {
std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren());
const auto& timer = static_cast<const Kernel::Timer&>(object);
list.push_back(std::make_unique<WaitTreeText>(
tr("reset type = %1").arg(GetResetTypeQString(timer.GetResetType()))));
list.push_back(
std::make_unique<WaitTreeText>(tr("initial delay = %1").arg(timer.GetInitialDelay())));
list.push_back(
std::make_unique<WaitTreeText>(tr("interval delay = %1").arg(timer.GetIntervalDelay())));
return list;
}
WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list)
: thread_list(list) {}
WaitTreeThreadList::~WaitTreeThreadList() = default;

View File

@@ -20,7 +20,6 @@ namespace Kernel {
class ReadableEvent;
class WaitObject;
class Thread;
class Timer;
} // namespace Kernel
class WaitTreeThread;
@@ -150,15 +149,6 @@ public:
std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
};
class WaitTreeTimer : public WaitTreeWaitObject {
Q_OBJECT
public:
explicit WaitTreeTimer(const Kernel::Timer& object);
~WaitTreeTimer() override;
std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override;
};
class WaitTreeThreadList : public WaitTreeExpandableItem {
Q_OBJECT
public:

View File

@@ -43,6 +43,7 @@ QProgressBar {
}
QProgressBar::chunk {
background-color: #0ab9e6;
width: 1px;
})";
constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
padding: 2px;
}
QProgressBar::chunk {
background-color: #ff3c28;
background-color: #ff3c28;
width: 1px;
})";
constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(

Some files were not shown because too many files have changed in this diff Show More