Compare commits

..

122 Commits

Author SHA1 Message Date
Alex James
a5dbda3f76 travis/macos: Use macpack to bundle dependencies
This appears to properly handle the ffmpeg libraries that dylibbundler
failed to patch.
2019-03-23 01:37:38 +01:00
MerryMage
2bcebcff2a travis: Simplify macos/upload.sh 2019-03-23 01:33:53 +01:00
bunnei
819dd93257 Merge pull request #2279 from lioncash/cheat-global
file_sys/cheat_engine: Remove use of global system accessors
2019-03-22 18:41:44 -04:00
bunnei
e5893db3e6 Merge pull request #2256 from bunnei/gpu-vmm
gpu: Rewrite MemoryManager based on the VMManager implementation.
2019-03-22 18:41:12 -04:00
bunnei
a7157fe27d Merge pull request #2277 from bunnei/fix-smo-transitions
Revert "Devirtualize Register/Unregister and use a wrapper instead."
2019-03-22 18:40:53 -04:00
Lioncash
733cf179b8 file_sys/cheat_engine: Silence truncation and sign-conversion warnings 2019-03-22 14:43:41 -04:00
Lioncash
540235bb05 file_sys/cheat_engine: Remove use of global system accessors
Instead, pass in the core timing instance and make the dependency
explicit in the interface.
2019-03-22 14:43:37 -04:00
bunnei
7b6d516faa Merge pull request #2234 from lioncash/mutex
core/hle/kernel: Make Mutex a per-process class.
2019-03-21 22:18:36 -04:00
bunnei
b78e7b3454 Merge pull request #2274 from lioncash/include
core/memory: Remove unnecessary includes
2019-03-21 22:14:27 -04:00
bunnei
d0dddb3e9d Revert "Devirtualize Register/Unregister and use a wrapper instead."
- Fixes graphical issues from transitions in Super Mario Odyssey.
2019-03-21 21:56:56 -04:00
bunnei
4d95adcac5 Merge pull request #2275 from lioncash/memflags
kernel/vm_manager: Amend flag value for code data
2019-03-21 21:43:15 -04:00
bunnei
e703772c83 Merge pull request #2276 from lioncash/am
service/am: Add function table for IDebugFunctions
2019-03-21 21:42:17 -04:00
bunnei
639f0c524d Merge pull request #1933 from DarkLordZach/cheat-engine
file_sys: Implement parser and interpreter for game memory cheats
2019-03-21 21:41:59 -04:00
Lioncash
76f27d1f44 service/am: Add function table for IDebugFunctions
We already have the service related stuff set up for this, however, it's
missing the function table.
2019-03-21 15:58:03 -04:00
Lioncash
18918f5f2f kernel/vm_manager: Rename CodeStatic/CodeMutable to Code and CodeData respectively
Makes it more evident that one is for actual code and one is for actual
data. Mutable and static are less than ideal terms here, because
read-only data is technically not mutable, but we were mapping it with
that label.
2019-03-21 11:43:35 -04:00
Lioncash
56c80a2a21 kernel/vm_manager: Amend flag values for CodeMutable
This should actually be using the data flags, rather than the code
flags.
2019-03-21 11:23:14 -04:00
Lioncash
c221308a66 core/memory: Remove unnecessary includes
In 93da8e0abf, the page table construct
was moved to the common library (which utilized these inclusions). Since
the move, nothing requires these headers to be included within the
memory header.
2019-03-21 09:48:54 -04:00
bunnei
839c0f829b Merge pull request #2260 from lioncash/sdl
input_common/sdl: Correct return values within GetPollers implementations
2019-03-21 00:20:49 -04:00
Lioncash
109b78a6d6 common/bit_util: Fix bad merge duplicating the copy constructor
Introduced as a result of #2090, we already define the copy constructor
further down below, so this isn't needed.
2019-03-20 23:48:37 -04:00
bunnei
3e930304fe Merge pull request #2090 from FearlessTobi/port-4599
Port citra-emu/citra#4244 and citra-emu/citra#4599: Changes to BitField
2019-03-20 23:44:20 -04:00
bunnei
52f36ea1c7 Merge pull request #2262 from lioncash/enum
file_sys/content_archive: Amend name of Data_Unknown5 enum entry
2019-03-20 23:13:32 -04:00
bunnei
b72664abfd Merge pull request #2273 from lioncash/guard
common/uint128: Add missing header guard
2019-03-20 23:13:06 -04:00
bunnei
2117edd0f8 memory_manager: Cleanup FindFreeRegion. 2019-03-20 23:12:28 -04:00
bunnei
5a5fccaa23 memory_manager: Use Common::AlignUp in public interface as needed. 2019-03-20 22:58:49 -04:00
Lioncash
f2c41ba256 common/uint128: Add missing header guard 2019-03-20 22:39:00 -04:00
Lioncash
b0d70096a1 common/uint128: Add missing top-file source text 2019-03-20 22:38:25 -04:00
bunnei
e76f442a0e Merge pull request #2268 from lioncash/codeset
core/kernel: Migrate CodeSet to its own source files
2019-03-20 22:37:58 -04:00
bunnei
72837e4b3d memory_manager: Bug fixes and further cleanup. 2019-03-20 22:36:03 -04:00
bunnei
3ae0de9b53 memory: Check that core is powered on before attempting to use GPU.
- GPU will be released on shutdown, before pages are unmapped.
- On subsequent runs, current_page_table will be not nullptr, but GPU might not be valid yet.
2019-03-20 22:36:03 -04:00
bunnei
19330f45d3 maxwell_dma: Check for valid source in destination before copy.
- Avoid a crash in Octopath Traveler.
2019-03-20 22:36:03 -04:00
bunnei
197dcf0b5e memory_manager: Add protections for invalid GPU addresses.
- Avoid a crash in Xenoblade Chronicles 2.
2019-03-20 22:36:03 -04:00
bunnei
21eb4cfa7f gl_rasterizer_cache: Check that backing memory is valid before creating a surface.
- Fixes a crash in Puyo Puyo Tetris.
2019-03-20 22:36:02 -04:00
bunnei
22d3dfbcd4 gpu: Rewrite virtual memory manager using PageTable. 2019-03-20 22:36:02 -04:00
bunnei
241563d15c gpu: Move GPUVAddr definition to common_types. 2019-03-20 22:36:02 -04:00
bunnei
43b83d6b6a Merge pull request #2272 from lioncash/boost
common/CMakeLists: Amend boost dependency
2019-03-20 22:35:36 -04:00
Lioncash
1b6adb5308 common/CMakeLists: Amend boost dependency
When #2247 was created, thread_queue_list.h was the only user of
boost-related code, however #2252 moved the page table struct into
common, which makes use of Boost.ICL, so we need to add the dependency
to the common library's link interface again.
2019-03-20 21:42:13 -04:00
bunnei
872a7bee72 Merge pull request #2267 from FernandoS27/fix-2238
Fix crash caused by #2238.
2019-03-20 21:36:55 -04:00
bunnei
e8ff8a66b0 Merge pull request #2247 from lioncash/include
common/thread_queue_list: Remove unnecessary dependency on boost
2019-03-20 21:34:12 -04:00
bunnei
723ad4512f Merge pull request #2224 from lioncash/opus
hwopus: Leverage multistream API for decoding regular Opus packets
2019-03-20 21:33:37 -04:00
bunnei
c1409602da Merge pull request #2239 from FearlessTobi/port-4684
Port citra-emu/citra#4684: "frontend: qt: fix a freeze where if you click on entry in the game list too fast, citra will hang"
2019-03-20 21:33:05 -04:00
Lioncash
8f454a5c68 kernel/process: Make MapSegment lambda reference parameter const
The segment itself isn't actually modified.
2019-03-20 13:07:09 -04:00
Lioncash
1b6bd9d6df kernel: Move CodeSet structure to its own source files
Given this is utilized by the loaders, this allows avoiding inclusion of
the kernel process definitions where avoidable.

This also keeps the loading format for all executable data separate from
the kernel objects.
2019-03-20 13:07:04 -04:00
bunnei
9d11303a36 Merge pull request #2264 from lioncash/linker
core/loader: Remove vestigial Linker class
2019-03-20 12:31:00 -04:00
bunnei
adf07cbe17 Merge pull request #2263 from FearlessTobi/port-4697
Port citra-emu/citra#4697: "Fix getopt on systems where char is unsigned by default"
2019-03-19 23:27:17 -04:00
Fernando Sahmkow
8a320a6ee2 Fix crash caused by 2238. 2019-03-19 22:45:34 -04:00
Lioncash
1342c53e27 loader: Remove Linker class
Given the class is now currently unused, it can be removed.
2019-03-19 21:32:02 -04:00
Lioncash
ab00552118 loader: Remove Linker inheritance from NRO and NSO loaders
Neither the NRO or NSO loaders actually make use of the functions or
members provided by the Linker interface, so we can just remove the
inheritance altogether.
2019-03-19 21:31:59 -04:00
xperia64
ec74a4fd4a Fix getopt on systems where char is unsigned by default 2019-03-19 23:53:40 +01:00
Lioncash
b8c7072206 file_sys/content_archive: Amend name of Data_Unknown5 enum entry
While we're at it, give each entry some documentation.
2019-03-19 15:58:38 -04:00
bunnei
746167f11a Merge pull request #2258 from lioncash/am
service/am: Supply remaining missing IAudioController functions
2019-03-18 22:20:36 -04:00
Lioncash
eb335f51ca input_common/sdl: Correct return values within implementations of GetPollers()
In both cases, we weren't actually returning anything, which is
undefined behavior.
2019-03-18 11:40:38 -04:00
Lioncash
874826b6dd input_common/sdl: Use a type alias to shorten declaration of GetPollers
Just makes the definitions a little bit more tidy.
2019-03-18 11:40:35 -04:00
bunnei
8dc2f01eae Merge pull request #2259 from lioncash/fsp
fsp_srv: Unstub SetCurrentProcess
2019-03-18 11:13:52 -04:00
bunnei
e05136f70b Merge pull request #2254 from lioncash/redundant
input_common/sdl_impl: Minor cleanup in SDLState constructor
2019-03-18 11:13:20 -04:00
Lioncash
9f092554c2 fsp_srv: Unstub SetCurrentProcess
This just acts as a basic setter for a given PID value and performs no
further checking, so we can just store the passed in value.
2019-03-18 10:38:01 -04:00
Lioncash
26b809549b service/am: Add basic implementation of ChangeMainAppletMasterVolume
All this does is supply a new volume level and a fade time in
nanoseconds for the volume transition to occur within.
2019-03-18 09:18:34 -04:00
Lioncash
c07ebeac19 service/am: Unstub SetTransparentVolumeRate()
Like the other volume setter, this mainly just sets a data member within
the service, nothing too special.
2019-03-18 09:18:34 -04:00
Lioncash
ecd3afdc8e service/am: Unstub SetExpectedMasterVolume()
This function passes in the desired main applet and library applet
volume levels. We can then just pass those values back within the
relevant volume getter functions, allowing us to unstub those as well.

The initial values for the library and main applet volumes differ. The
main applet volume is 0.25 by default, while the library applet volume
is initialized to 1.0 by default in the services themselves.
2019-03-18 09:18:34 -04:00
bunnei
30f228a8c9 Merge pull request #2238 from lioncash/thread
kernel/thread: Amend conditional test and assignment within UpdatePriority()
2019-03-17 22:27:33 -04:00
Mat M
c57d8eb66c Merge pull request #2257 from MerryMage/boost-1.66
CMakeLists: Raise minimum Boost requirement to 1.66.0
2019-03-17 20:21:11 -04:00
MerryMage
51f609fee7 CMakeLists: Raise minimum Boost requirement to 1.66.0
Required due to bugfix in boost for changed template resolving rules in GCC 7.3.0 in C++17 mode
2019-03-17 23:04:03 +00:00
bunnei
57ca1e3e69 Merge pull request #2252 from bunnei/move-page-table
core: Move PageTable struct into Common.
2019-03-17 14:42:57 -04:00
Lioncash
114060fd87 input_common/sdl_impl: Make lambda capture more specific in SDLState constructor
We don't need to universally capture by reference. We specifically just
need to capture the this pointer.
2019-03-17 04:02:52 -04:00
Lioncash
d74aa13bd3 input_common/sdl_impl: Remove unnecessary std::chrono::duration construction
Specifying the time unit itself is sufficient here.
2019-03-17 04:02:52 -04:00
Lioncash
834d3fe336 input_common/sdl_impl: Remove unused variable in SDLState constructor 2019-03-17 04:02:48 -04:00
bunnei
41566c615b Merge pull request #2251 from bunnei/skip-zero-flush
gl_rasterizer: Skip zero addr/sized regions on flush/invalidate.
2019-03-17 01:40:32 -04:00
bunnei
9ad3b01d30 Merge pull request #2249 from lioncash/ipc
ipc_helpers: Allow pushing and popping floating-point values
2019-03-16 22:22:03 -04:00
bunnei
fd0533ef4c Merge pull request #2246 from lioncash/opus-fork
externals: Update opus to latest master
2019-03-16 22:12:59 -04:00
bunnei
ed7a1e1443 Merge pull request #2245 from lioncash/unused-def
kernel/thread: Actually remove the definition of ExitCurrentThread()
2019-03-16 22:12:40 -04:00
bunnei
93da8e0abf core: Move PageTable struct into Common. 2019-03-16 22:05:40 -04:00
bunnei
032e4c4ca3 gl_rasterizer: Skip zero addr/sized regions on flush/invalidate. 2019-03-16 22:03:19 -04:00
bunnei
2392e146b0 Merge pull request #2244 from bunnei/gpu-mem-refactor
video_core: Refactor to use MemoryManager interface for all memory access.
2019-03-16 21:59:45 -04:00
bunnei
bf41132aa9 Merge pull request #2243 from bunnei/mem-simplify-cache
memory: Simplify rasterizer cache operations.
2019-03-16 21:59:30 -04:00
bunnei
059465d496 Merge pull request #2129 from FernandoS27/cntpct
Correct CNTPCT from using CPU Cycles to using Clock Cycles
2019-03-16 21:58:59 -04:00
Lioncash
64444ff481 ipc_helpers: Allow pushing and popping floating-point values
Certain values that are passed through the IPC buffer are actually
floating point values, not solely integral values.
2019-03-16 14:05:03 -04:00
Lioncash
f71c598907 common/thread_queue_list: Remove unnecessary dependency on boost
We really don't need to pull in several headers of boost related
machinery just to perform the erase-remove idiom (particularly with
C++20 around the corner, which adds universal container std::erase and
std::erase_if, which we can just use instead).

With this, we don't need to link in anything boost-related into common.
2019-03-16 05:01:39 -04:00
Lioncash
6abc56672c externals: Update opus to latest master
Prevents yuzu from getting 2000+ warnings in MSVC in a future change.
2019-03-16 04:10:41 -04:00
Lioncash
99f982dce2 kernel/thread: Actually remove the definition of ExitCurrentThread()
This was intended to be removed in
51d7f6bffc, but I guess I forgot to
actually save the file like a dingus.
2019-03-16 00:51:44 -04:00
bunnei
29c242721a Merge pull request #2241 from lioncash/compile-flags
CMakeLists: Remove now-unnecessary GCC special-casing
2019-03-16 00:43:29 -04:00
bunnei
bdf2da4ee8 Merge pull request #2242 from lioncash/thread-fn
kernel/thread: Remove WaitCurrentThread_Sleep() and ExitCurrentThread()
2019-03-16 00:43:09 -04:00
bunnei
10118c71e0 memory: Simplify rasterizer cache operations. 2019-03-16 00:41:08 -04:00
bunnei
574e89d924 video_core: Refactor to use MemoryManager interface for all memory access.
# Conflicts:
#	src/video_core/engines/kepler_memory.cpp
#	src/video_core/engines/maxwell_3d.cpp
#	src/video_core/morton.cpp
#	src/video_core/morton.h
#	src/video_core/renderer_opengl/gl_global_cache.cpp
#	src/video_core/renderer_opengl/gl_global_cache.h
#	src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
2019-03-16 00:38:48 -04:00
bunnei
47b622825c Merge pull request #2237 from bunnei/cache-host-addr
gpu: Use host address for caching instead of guest address.
2019-03-16 00:05:24 -04:00
Lioncash
51d7f6bffc kernel/thread: Move thread exiting logic from ExitCurrentThread to svcExitThread
Puts the operation on global state in the same places as the rest of the
svc calls.
2019-03-15 23:58:37 -04:00
Lioncash
c892cf01fa kernel/thread: Migrate WaitCurrentThread_Sleep into the Thread interface
Rather than make a global accessor for this sort of thing. We can make
it a part of the thread interface itself. This allows getting rid of a
hidden global accessor in the kernel code.
2019-03-15 23:58:31 -04:00
Lioncash
db47d7e471 kernel/thread: Expand documentation of nominal_priority and current_priority
Aims to disambiguate why each priority instance exists a little bit.
While we're at it, also add an explanatory comment to UpdatePriority().
2019-03-15 23:02:14 -04:00
Lioncash
e0d1f11968 kernel/thread: Make bracing consistent within UpdatePriority() 2019-03-15 23:02:10 -04:00
Lioncash
39483b92b7 kernel/thread: Amend condition within UpdatePriority()
This condition was checking against the nominal thread priority, whereas
the kernel itself checks against the current priority instead. We were
also assigning the nominal priority, when we should be assigning
current_priority, which takes priority inheritance into account.

This can lead to the incorrect priority being assigned to a thread.

Given we recursively update the relevant threads, we don't need to go
through the whole mutex waiter list. This matches what the kernel does
as well (only accessing the first entry within the waiting list).
2019-03-15 23:01:43 -04:00
Lioncash
0b78cfcc53 kernel/thread: Maintain priority ordering of added mutex waiting threads
The kernel keeps the internal waiting list ordered by priority. This is
trivial to do with std::find_if followed by an insertion.
2019-03-15 23:01:39 -04:00
bunnei
06ac6460d3 Merge pull request #2048 from FearlessTobi/port-3924
Port citra-emu/citra#3924: "citra_qt: Settings (configuration) rework"
2019-03-15 22:23:38 -04:00
Lioncash
e5b004e903 CMakeLists: Remove now-unnecessary GCC special-casing
This issue has since been fixed in newer versions of Boost, so we don't
need to worry about this anymore.
2019-03-15 20:49:58 -04:00
liushuyu
59f16f2e02 frontend: qt: fix a freeze where if you click on entry in the game list too fast, citra will hang 2019-03-15 16:10:21 +01:00
bunnei
2eaf6c41a4 gpu: Use host address for caching instead of guest address. 2019-03-14 22:34:42 -04:00
bunnei
84d3cdf7d7 Merge pull request #2233 from ReinUsesLisp/morton-cleanup
video_core/morton: Miscellaneous changes
2019-03-14 21:23:12 -04:00
bunnei
6788ebffc8 Merge pull request #2229 from ReinUsesLisp/vk-sampler-cache
vk_sampler_cache: Implement a sampler cache
2019-03-14 21:22:34 -04:00
ReinUsesLisp
ffe2e50458 video_core/morton: Use enum to describe MortonCopyPixels128 mode 2019-03-13 16:35:21 -03:00
ReinUsesLisp
6ed6129b4f video_core/morton: Remove unused parameter in MortonSwizzle 2019-03-13 16:35:10 -03:00
ReinUsesLisp
9030a8259f video_core/morton: Remove clang-format off when it's not needed 2019-03-13 16:16:45 -03:00
ReinUsesLisp
fdf76a25ab video_core/morton: Remove unused functions 2019-03-13 16:15:54 -03:00
Mat M
a3734d7e31 vk_sampler_cache: Use operator== instead of memcmp
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-03-12 21:05:36 -03:00
ReinUsesLisp
aa59d77c3b vk_sampler_cache: Implement a sampler cache 2019-03-12 20:20:57 -03:00
ReinUsesLisp
8ebeb9ade2 video_core/texture: Add a raw representation of TSCEntry 2019-03-12 16:56:29 -03:00
Lioncash
7ad3d4e49c hwopus: Leverage multistream API for decoding regular Opus packets
After doing a little more reading up on the Opus codec, it turns out
that the multistream API that is part of libopus can handle regular
packets. Regular packets are just a degenerate case of multistream Opus
packets, and all that's necessary is to pass the number of streams as 1
and  provide a basic channel mapping, then everything works fine for
that case.

This allows us to get rid of the need to use both APIs in the future
when implementing multistream variants in a follow-up PR, greatly
simplifying the code that needs to be written.
2019-03-11 07:06:18 -04:00
zhupengfei
39e895c5ff citra_qt: Settings (configuration) rework 2019-03-07 16:55:50 +01:00
Zach Hilman
52ac6419da vm_manager: Remove cheat-specific ranges from VMManager 2019-03-05 10:09:36 -05:00
Zach Hilman
7053546687 core: Add support for registering and controlling ownership of CheatEngine 2019-03-04 18:41:29 -05:00
Zach Hilman
769b346682 cheat_engine: Add parser and interpreter for game cheats 2019-03-04 18:39:58 -05:00
Zach Hilman
c100a4b8d4 loader/nso: Set main code region in VMManager
For rom directories (and by extension, XCI/NSP/NAX/NCA) this is for the NSO with name 'main', for regular NSOs, this is the NSO.
2019-03-04 18:39:58 -05:00
Zach Hilman
b952a30555 vm_manager: Add support for storing and getting main code region
Used as root for one region of cheats, set by loader
2019-03-04 18:39:58 -05:00
Zach Hilman
4495bf5706 patch_manager: Display cheats in game list add-ons 2019-03-04 18:39:57 -05:00
Zach Hilman
c5091bfe00 patch_manager: Add support for loading cheats lists
Uses load/<title_id>/<mod_name>/cheats as root dir, file name is all upper or lower hex first 8 bytes build ID.
2019-03-04 18:39:57 -05:00
Zach Hilman
9d1ab766a0 controllers/npad: Add accessor for current press state
Allows frontend/features to access pressed buttons conveniently as possible
2019-03-04 18:39:57 -05:00
Fernando Sahmkow
a8d4927e29 Corrections, documenting and fixes. 2019-02-16 16:52:24 -04:00
Fernando Sahmkow
ecccfe0337 Use u128 on Clock Cycles calculation. 2019-02-15 22:57:16 -04:00
Fernando Sahmkow
3ea48e8ebe Implement 128 bits Unsigned Integer Multiplication and Division. 2019-02-15 22:55:31 -04:00
Fernando Sahmkow
5b7ec71fb7 Correct CNTPCT to use Clock Cycles instead of Cpu Cycles. 2019-02-15 22:55:29 -04:00
fearlessTobi
efd83570bd Make bitfield assignment operator public
This change needs to be made to get the code compiling again. It was suggested after a conversation with Lioncash.

The conversation can be seen here: https://user-images.githubusercontent.com/20753089/45064197-b6107800-b0b2-11e8-9db8-f696299fb86a.PNG
2019-02-13 21:15:15 +01:00
Weiyi Wang
89abef3518 remove all occurance of specifying endianness inside BitField
This commit it automatically generated by command in zsh:
sed -i -- 's/BitField<\(.*\)_le>/BitField<\1>/g' **/*(D.)

BitField is now aware to endianness and default to little endian. It expects a value representation type without storage specification for its template parameter.
2019-02-06 18:13:45 +01:00
Weiyi Wang
6b81ceb060 common/bitfield: make it endianness-aware 2019-02-06 17:29:39 +01:00
Weiyi Wang
71530781f3 common/swap: remove default value for swap type internal storage
This is compromise for swap type being used in union. A union has deleted default constructor if it has at least one variant member with non-trivial default constructor, and no variant member of T has a default member initializer. In the use case of Bitfield, all variant members will be the swap type on endianness mismatch, which would all have non-trivial default constructor if default value is specified, and non of them can have member initializer
2019-02-06 17:24:27 +01:00
Weiyi Wang
6734c64976 common/swap: use template and tag for LE/BE specification
The tag can be useful for other type-generic templates like BitFields to forward the endianness specification
2019-02-06 17:24:13 +01:00
Weiyi Wang
94bc48dd78 common/swap: add swap template for enum 2019-02-06 17:21:15 +01:00
116 changed files with 3536 additions and 1931 deletions

View File

@@ -1,5 +1,6 @@
#!/bin/sh -ex
brew update
brew install dylibbundler p7zip qt5 sdl2 ccache
brew install p7zip qt5 sdl2 ccache
brew outdated cmake || brew upgrade cmake
pip3 install macpack

View File

@@ -11,92 +11,19 @@ mkdir "$REV_NAME"
cp build/bin/yuzu-cmd "$REV_NAME"
cp -r build/bin/yuzu.app "$REV_NAME"
# move qt libs into app bundle for deployment
$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app"
# move libs into folder for deployment
macpack "${REV_NAME}/yuzu.app/Contents/MacOS/yuzu" -d "../Frameworks"
# move qt frameworks into app bundle for deployment
$(brew --prefix)/opt/qt5/bin/macdeployqt "${REV_NAME}/yuzu.app" -executable="${REV_NAME}/yuzu.app/Contents/MacOS/yuzu"
# move SDL2 libs into folder for deployment
dylibbundler -b -x "${REV_NAME}/yuzu-cmd" -cd -d "${REV_NAME}/libs" -p "@executable_path/libs/"
# Make the changes to make the yuzu app standalone (i.e. not dependent on the current brew installation).
# To do this, the absolute references to each and every QT framework must be re-written to point to the local frameworks
# (in the Contents/Frameworks folder).
# The "install_name_tool" is used to do so.
# Coreutils is a hack to coerce Homebrew to point to the absolute Cellar path (symlink dereferenced). i.e:
# ls -l /usr/local/opt/qt5:: /usr/local/opt/qt5 -> ../Cellar/qt5/5.6.1-1
# grealpath ../Cellar/qt5/5.6.1-1:: /usr/local/Cellar/qt5/5.6.1-1
brew install coreutils || brew upgrade coreutils || true
REV_NAME_ALT=$REV_NAME/
# grealpath is located in coreutils, there is no "realpath" for OS X :(
QT_BREWS_PATH=$(grealpath "$(brew --prefix qt5)")
BREW_PATH=$(brew --prefix)
QT_VERSION_NUM=5
$BREW_PATH/opt/qt5/bin/macdeployqt "${REV_NAME_ALT}yuzu.app" \
-executable="${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu"
# These are the files that macdeployqt packed into Contents/Frameworks/ - we don't want those, so we replace them.
declare -a macos_libs=("QtCore" "QtWidgets" "QtGui" "QtOpenGL" "QtPrintSupport")
for macos_lib in "${macos_libs[@]}"
do
SC_FRAMEWORK_PART=$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib
# Replace macdeployqt versions of the Frameworks with our own (from /usr/local/opt/qt5/lib/)
cp "$BREW_PATH/opt/qt5/lib/$SC_FRAMEWORK_PART" "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
# Replace references within the embedded Framework files with "internal" versions.
for macos_lib2 in "${macos_libs[@]}"
do
# Since brew references both the non-symlinked and symlink paths of QT5, it needs to be duplicated.
# /usr/local/Cellar/qt5/5.6.1-1/lib and /usr/local/opt/qt5/lib both resolve to the same files.
# So the two lines below are effectively duplicates when resolved as a path, but as strings, they aren't.
RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
install_name_tool -change \
$QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
@executable_path/../Frameworks/$RM_FRAMEWORK_PART \
"${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
install_name_tool -change \
"$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
@executable_path/../Frameworks/$RM_FRAMEWORK_PART \
"${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$SC_FRAMEWORK_PART"
done
done
# Handles `This application failed to start because it could not find or load the Qt platform plugin "cocoa"`
# Which manifests itself as:
# "Exception Type: EXC_CRASH (SIGABRT) | Exception Codes: 0x0000000000000000, 0x0000000000000000 | Exception Note: EXC_CORPSE_NOTIFY"
# There may be more dylibs needed to be fixed...
declare -a macos_plugins=("Plugins/platforms/libqcocoa.dylib")
for macos_lib in "${macos_plugins[@]}"
do
install_name_tool -id @executable_path/../$macos_lib "${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
for macos_lib2 in "${macos_libs[@]}"
do
RM_FRAMEWORK_PART=$macos_lib2.framework/Versions/$QT_VERSION_NUM/$macos_lib2
install_name_tool -change \
$QT_BREWS_PATH/lib/$RM_FRAMEWORK_PART \
@executable_path/../Frameworks/$RM_FRAMEWORK_PART \
"${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
install_name_tool -change \
"$BREW_PATH/opt/qt5/lib/$RM_FRAMEWORK_PART" \
@executable_path/../Frameworks/$RM_FRAMEWORK_PART \
"${REV_NAME_ALT}yuzu.app/Contents/$macos_lib"
done
done
for macos_lib in "${macos_libs[@]}"
do
# Debugging info for Travis-CI
otool -L "${REV_NAME_ALT}yuzu.app/Contents/Frameworks/$macos_lib.framework/Versions/$QT_VERSION_NUM/$macos_lib"
done
# move libs into folder for deployment
macpack "${REV_NAME}/yuzu-cmd" -d "libs"
# Make the yuzu.app application launch a debugging terminal.
# Store away the actual binary
mv ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu-bin
mv ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu-bin
cat > ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu <<EOL
cat > ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu <<EOL
#!/usr/bin/env bash
cd "\`dirname "\$0"\`"
chmod +x yuzu-bin
@@ -105,6 +32,9 @@ EOL
# Content that will serve as the launching script for yuzu (within the .app folder)
# Make the launching script executable
chmod +x ${REV_NAME_ALT}yuzu.app/Contents/MacOS/yuzu
chmod +x ${REV_NAME}/yuzu.app/Contents/MacOS/yuzu
# Verify loader instructions
find "$REV_NAME" -exec otool -L {} \;
. .travis/common/post-upload.sh

View File

@@ -163,12 +163,6 @@ else()
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
endif()
# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
if (CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
endif()
# Set file offset size to 64 bits.
#
# On modern Unixes, this is typically already the case. The lone exception is
@@ -185,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
# System imported libraries
# ======================
find_package(Boost 1.63.0 QUIET)
find_package(Boost 1.66.0 QUIET)
if (NOT Boost_FOUND)
message(STATUS "Boost 1.63.0 or newer not found, falling back to externals")
message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")
set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
set(Boost_NO_SYSTEM_PATHS OFF)

2
externals/opus vendored

View File

@@ -92,10 +92,14 @@ add_library(common STATIC
logging/text_formatter.cpp
logging/text_formatter.h
math_util.h
memory_hook.cpp
memory_hook.h
microprofile.cpp
microprofile.h
microprofileui.h
misc.cpp
page_table.cpp
page_table.h
param_package.cpp
param_package.h
quaternion.h
@@ -114,6 +118,8 @@ add_library(common STATIC
threadsafe_queue.h
timer.cpp
timer.h
uint128.cpp
uint128.h
vector_math.h
web_result.h
)

View File

@@ -34,6 +34,7 @@
#include <limits>
#include <type_traits>
#include "common/common_funcs.h"
#include "common/swap.h"
/*
* Abstract bitfield class
@@ -108,7 +109,7 @@
* symptoms.
*/
#pragma pack(1)
template <std::size_t Position, std::size_t Bits, typename T>
template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
struct BitField {
private:
// UnderlyingType is T for non-enum types and the underlying type of T if
@@ -121,6 +122,8 @@ private:
// We store the value as the unsigned type to avoid undefined behaviour on value shifting
using StorageType = std::make_unsigned_t<UnderlyingType>;
using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
public:
/// Constants to allow limited introspection of fields if needed
static constexpr std::size_t position = Position;
@@ -170,7 +173,7 @@ public:
}
constexpr FORCE_INLINE void Assign(const T& value) {
storage = (storage & ~mask) | FormatValue(value);
storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
}
constexpr T Value() const {
@@ -182,7 +185,7 @@ public:
}
private:
StorageType storage;
StorageTypeWithEndian storage;
static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
@@ -193,3 +196,6 @@ private:
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
};
#pragma pack()
template <std::size_t Position, std::size_t Bits, typename T>
using BitFieldBE = BitField<Position, Bits, T, BETag>;

View File

@@ -40,10 +40,9 @@ using s64 = std::int64_t; ///< 64-bit signed int
using f32 = float; ///< 32-bit floating point
using f64 = double; ///< 64-bit floating point
// TODO: It would be nice to eventually replace these with strong types that prevent accidental
// conversion between each other.
using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
using u128 = std::array<std::uint64_t, 2>;
static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");

View File

@@ -2,10 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/memory_hook.h"
#include "common/memory_hook.h"
namespace Memory {
namespace Common {
MemoryHook::~MemoryHook() = default;
} // namespace Memory
} // namespace Common

View File

@@ -9,7 +9,7 @@
#include "common/common_types.h"
namespace Memory {
namespace Common {
/**
* Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
};
using MemoryHookPointer = std::shared_ptr<MemoryHook>;
} // namespace Memory
} // namespace Common

31
src/common/page_table.cpp Normal file
View File

@@ -0,0 +1,31 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/page_table.h"
namespace Common {
PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
PageTable::~PageTable() = default;
void PageTable::Resize(std::size_t address_space_width_in_bits) {
const std::size_t num_page_table_entries = 1ULL
<< (address_space_width_in_bits - page_size_in_bits);
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
backing_addr.resize(num_page_table_entries);
// The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
// vector size is subsequently decreased (via resize), the vector might not automatically
// actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
// 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
pointers.shrink_to_fit();
attributes.shrink_to_fit();
backing_addr.shrink_to_fit();
}
} // namespace Common

84
src/common/page_table.h Normal file
View File

@@ -0,0 +1,84 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "common/memory_hook.h"
namespace Common {
enum class PageType : u8 {
/// Page is unmapped and should cause an access error.
Unmapped,
/// Page is mapped to regular memory. This is the only type you can get pointers to.
Memory,
/// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
/// invalidation
RasterizerCachedMemory,
/// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
Special,
/// Page is allocated for use.
Allocated,
};
struct SpecialRegion {
enum class Type {
DebugHook,
IODevice,
} type;
MemoryHookPointer handler;
bool operator<(const SpecialRegion& other) const {
return std::tie(type, handler) < std::tie(other.type, other.handler);
}
bool operator==(const SpecialRegion& other) const {
return std::tie(type, handler) == std::tie(other.type, other.handler);
}
};
/**
* A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
* mimics the way a real CPU page table works.
*/
struct PageTable {
explicit PageTable(std::size_t page_size_in_bits);
~PageTable();
/**
* Resizes the page table to be able to accomodate enough pages within
* a given address space.
*
* @param address_space_width_in_bits The address size width in bits.
*/
void Resize(std::size_t address_space_width_in_bits);
/**
* Vector of memory pointers backing each page. An entry can only be non-null if the
* corresponding entry in the `attributes` vector is of type `Memory`.
*/
std::vector<u8*> pointers;
/**
* Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
* of type `Special`.
*/
boost::icl::interval_map<u64, std::set<SpecialRegion>> special_regions;
/**
* Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
* the corresponding entry in `pointers` MUST be set to null.
*/
std::vector<PageType> attributes;
std::vector<u64> backing_addr;
const std::size_t page_size_in_bits{};
};
} // namespace Common

View File

@@ -17,6 +17,8 @@
#pragma once
#include <type_traits>
#if defined(_MSC_VER)
#include <cstdlib>
#elif defined(__linux__)
@@ -170,7 +172,7 @@ struct swap_struct_t {
using swapped_t = swap_struct_t;
protected:
T value = T();
T value;
static T swap(T v) {
return F::swap(v);
@@ -605,52 +607,154 @@ struct swap_double_t {
}
};
template <typename T>
struct swap_enum_t {
static_assert(std::is_enum_v<T>);
using base = std::underlying_type_t<T>;
public:
swap_enum_t() = default;
swap_enum_t(const T& v) : value(swap(v)) {}
swap_enum_t& operator=(const T& v) {
value = swap(v);
return *this;
}
operator T() const {
return swap(value);
}
explicit operator base() const {
return static_cast<base>(swap(value));
}
protected:
T value{};
// clang-format off
using swap_t = std::conditional_t<
std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
// clang-format on
static T swap(T x) {
return static_cast<T>(swap_t::swap(static_cast<base>(x)));
}
};
struct SwapTag {}; // Use the different endianness from the system
struct KeepTag {}; // Use the same endianness as the system
template <typename T, typename Tag>
struct AddEndian;
// KeepTag specializations
template <typename T>
struct AddEndian<T, KeepTag> {
using type = T;
};
// SwapTag specializations
template <>
struct AddEndian<u8, SwapTag> {
using type = u8;
};
template <>
struct AddEndian<u16, SwapTag> {
using type = swap_struct_t<u16, swap_16_t<u16>>;
};
template <>
struct AddEndian<u32, SwapTag> {
using type = swap_struct_t<u32, swap_32_t<u32>>;
};
template <>
struct AddEndian<u64, SwapTag> {
using type = swap_struct_t<u64, swap_64_t<u64>>;
};
template <>
struct AddEndian<s8, SwapTag> {
using type = s8;
};
template <>
struct AddEndian<s16, SwapTag> {
using type = swap_struct_t<s16, swap_16_t<s16>>;
};
template <>
struct AddEndian<s32, SwapTag> {
using type = swap_struct_t<s32, swap_32_t<s32>>;
};
template <>
struct AddEndian<s64, SwapTag> {
using type = swap_struct_t<s64, swap_64_t<s64>>;
};
template <>
struct AddEndian<float, SwapTag> {
using type = swap_struct_t<float, swap_float_t<float>>;
};
template <>
struct AddEndian<double, SwapTag> {
using type = swap_struct_t<double, swap_double_t<double>>;
};
template <typename T>
struct AddEndian<T, SwapTag> {
static_assert(std::is_enum_v<T>);
using type = swap_enum_t<T>;
};
// Alias LETag/BETag as KeepTag/SwapTag depending on the system
#if COMMON_LITTLE_ENDIAN
using u16_le = u16;
using u32_le = u32;
using u64_le = u64;
using s16_le = s16;
using s32_le = s32;
using s64_le = s64;
using LETag = KeepTag;
using BETag = SwapTag;
using float_le = float;
using double_le = double;
using u64_be = swap_struct_t<u64, swap_64_t<u64>>;
using s64_be = swap_struct_t<s64, swap_64_t<s64>>;
using u32_be = swap_struct_t<u32, swap_32_t<u32>>;
using s32_be = swap_struct_t<s32, swap_32_t<s32>>;
using u16_be = swap_struct_t<u16, swap_16_t<u16>>;
using s16_be = swap_struct_t<s16, swap_16_t<s16>>;
using float_be = swap_struct_t<float, swap_float_t<float>>;
using double_be = swap_struct_t<double, swap_double_t<double>>;
#else
using u64_le = swap_struct_t<u64, swap_64_t<u64>>;
using s64_le = swap_struct_t<s64, swap_64_t<s64>>;
using u32_le = swap_struct_t<u32, swap_32_t<u32>>;
using s32_le = swap_struct_t<s32, swap_32_t<s32>>;
using u16_le = swap_struct_t<u16, swap_16_t<u16>>;
using s16_le = swap_struct_t<s16, swap_16_t<s16>>;
using float_le = swap_struct_t<float, swap_float_t<float>>;
using double_le = swap_struct_t<double, swap_double_t<double>>;
using u16_be = u16;
using u32_be = u32;
using u64_be = u64;
using s16_be = s16;
using s32_be = s32;
using s64_be = s64;
using float_be = float;
using double_be = double;
using BETag = KeepTag;
using LETag = SwapTag;
#endif
// Aliases for LE types
using u16_le = AddEndian<u16, LETag>::type;
using u32_le = AddEndian<u32, LETag>::type;
using u64_le = AddEndian<u64, LETag>::type;
using s16_le = AddEndian<s16, LETag>::type;
using s32_le = AddEndian<s32, LETag>::type;
using s64_le = AddEndian<s64, LETag>::type;
template <typename T>
using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
using float_le = AddEndian<float, LETag>::type;
using double_le = AddEndian<double, LETag>::type;
// Aliases for BE types
using u16_be = AddEndian<u16, BETag>::type;
using u32_be = AddEndian<u32, BETag>::type;
using u64_be = AddEndian<u64, BETag>::type;
using s16_be = AddEndian<s16, BETag>::type;
using s32_be = AddEndian<s32, BETag>::type;
using s64_be = AddEndian<s64, BETag>::type;
template <typename T>
using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
using float_be = AddEndian<float, BETag>::type;
using double_be = AddEndian<double, BETag>::type;

View File

@@ -6,7 +6,6 @@
#include <array>
#include <deque>
#include <boost/range/algorithm_ext/erase.hpp>
namespace Common {
@@ -111,8 +110,9 @@ struct ThreadQueueList {
}
void remove(Priority priority, const T& thread_id) {
Queue* cur = &queues[priority];
boost::remove_erase(cur->data, thread_id);
Queue* const cur = &queues[priority];
const auto iter = std::remove(cur->data.begin(), cur->data.end(), thread_id);
cur->data.erase(iter, cur->data.end());
}
void rotate(Priority priority) {

45
src/common/uint128.cpp Normal file
View File

@@ -0,0 +1,45 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#ifdef _MSC_VER
#include <intrin.h>
#pragma intrinsic(_umul128)
#endif
#include <cstring>
#include "common/uint128.h"
namespace Common {
u128 Multiply64Into128(u64 a, u64 b) {
u128 result;
#ifdef _MSC_VER
result[0] = _umul128(a, b, &result[1]);
#else
unsigned __int128 tmp = a;
tmp *= b;
std::memcpy(&result, &tmp, sizeof(u128));
#endif
return result;
}
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
u64 remainder = dividend[0] % divisor;
u64 accum = dividend[0] / divisor;
if (dividend[1] == 0)
return {accum, remainder};
// We ignore dividend[1] / divisor as that overflows
const u64 first_segment = (dividend[1] % divisor) << 32;
accum += (first_segment / divisor) << 32;
const u64 second_segment = (first_segment % divisor) << 32;
accum += (second_segment / divisor);
remainder += second_segment % divisor;
if (remainder >= divisor) {
accum++;
remainder -= divisor;
}
return {accum, remainder};
}
} // namespace Common

19
src/common/uint128.h Normal file
View File

@@ -0,0 +1,19 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include "common/common_types.h"
namespace Common {
// This function multiplies 2 u64 values and produces a u128 value;
u128 Multiply64Into128(u64 a, u64 b);
// This function divides a u128 by a u32 value and produces two u64 values:
// the result of division and the remainder
std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
} // namespace Common

View File

@@ -31,6 +31,8 @@ add_library(core STATIC
file_sys/bis_factory.h
file_sys/card_image.cpp
file_sys/card_image.h
file_sys/cheat_engine.cpp
file_sys/cheat_engine.h
file_sys/content_archive.cpp
file_sys/content_archive.h
file_sys/control_metadata.cpp
@@ -107,6 +109,8 @@ add_library(core STATIC
hle/kernel/client_port.h
hle/kernel/client_session.cpp
hle/kernel/client_session.h
hle/kernel/code_set.cpp
hle/kernel/code_set.h
hle/kernel/errors.h
hle/kernel/handle_table.cpp
hle/kernel/handle_table.h
@@ -419,8 +423,6 @@ add_library(core STATIC
loader/deconstructed_rom_directory.h
loader/elf.cpp
loader/elf.h
loader/linker.cpp
loader/linker.h
loader/loader.cpp
loader/loader.h
loader/nax.cpp
@@ -437,8 +439,6 @@ add_library(core STATIC
loader/xci.h
memory.cpp
memory.h
memory_hook.cpp
memory_hook.h
memory_setup.h
perf_stats.cpp
perf_stats.h

View File

@@ -12,6 +12,7 @@
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
return std::max(parent.core_timing.GetDowncount(), 0);
}
u64 GetCNTPCT() override {
return parent.core_timing.GetTicks();
return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
}
ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
config.tpidr_el0 = &cb->tpidr_el0;
config.dczid_el0 = 4;
config.ctr_el0 = 0x8444c004;
config.cntfrq_el0 = 19200000; // Value from fusee.
config.cntfrq_el0 = Timing::CNTFREQ;
// Unpredictable instructions
config.define_unpredictable_behaviour = true;

View File

@@ -12,7 +12,7 @@
#include "core/arm/exclusive_monitor.h"
#include "core/arm/unicorn/arm_unicorn.h"
namespace Memory {
namespace Common {
struct PageTable;
}
@@ -70,7 +70,7 @@ private:
Timing::CoreTiming& core_timing;
DynarmicExclusiveMonitor& exclusive_monitor;
Memory::PageTable* current_page_table = nullptr;
Common::PageTable* current_page_table = nullptr;
};
class DynarmicExclusiveMonitor final : public ExclusiveMonitor {

View File

@@ -32,6 +32,7 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "file_sys/cheat_engine.h"
#include "frontend/applets/profile_select.h"
#include "frontend/applets/software_keyboard.h"
#include "frontend/applets/web_browser.h"
@@ -205,6 +206,7 @@ struct System::Impl {
GDBStub::Shutdown();
Service::Shutdown();
service_manager.reset();
cheat_engine.reset();
telemetry_session.reset();
gpu_core.reset();
@@ -255,6 +257,8 @@ struct System::Impl {
CpuCoreManager cpu_core_manager;
bool is_powered_on = false;
std::unique_ptr<FileSys::CheatEngine> cheat_engine;
/// Frontend applets
std::unique_ptr<Core::Frontend::ProfileSelectApplet> profile_selector;
std::unique_ptr<Core::Frontend::SoftwareKeyboardApplet> software_keyboard;
@@ -453,6 +457,13 @@ Tegra::DebugContext* System::GetGPUDebugContext() const {
return impl->debug_context.get();
}
void System::RegisterCheatList(const std::vector<FileSys::CheatList>& list,
const std::string& build_id, VAddr code_region_start,
VAddr code_region_end) {
impl->cheat_engine = std::make_unique<FileSys::CheatEngine>(*this, list, build_id,
code_region_start, code_region_end);
}
void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
impl->virtual_filesystem = std::move(vfs);
}

View File

@@ -20,6 +20,7 @@ class WebBrowserApplet;
} // namespace Core::Frontend
namespace FileSys {
class CheatList;
class VfsFilesystem;
} // namespace FileSys
@@ -253,6 +254,9 @@ public:
std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;
void RegisterCheatList(const std::vector<FileSys::CheatList>& list, const std::string& build_id,
VAddr code_region_start, VAddr code_region_end);
void SetProfileSelector(std::unique_ptr<Frontend::ProfileSelectApplet> applet);
const Frontend::ProfileSelectApplet& GetProfileSelector() const;

View File

@@ -7,6 +7,7 @@
#include <cinttypes>
#include <limits>
#include "common/logging/log.h"
#include "common/uint128.h"
namespace Core::Timing {
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
}
u64 CpuCyclesToClockCycles(u64 ticks) {
const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
}
} // namespace Core::Timing

View File

@@ -11,6 +11,7 @@ namespace Core::Timing {
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
// The exact value used is of course unverified.
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
constexpr u64 CNTFREQ = 19200000; // Value from fusee.
inline s64 msToCycles(int ms) {
// since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
return cycles * 1000 / BASE_CLOCK_RATE;
}
u64 CpuCyclesToClockCycles(u64 ticks);
} // namespace Core::Timing

View File

@@ -0,0 +1,490 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <locale>
#include "common/hex_util.h"
#include "common/microprofile.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/file_sys/cheat_engine.h"
#include "core/hle/kernel/process.h"
#include "core/hle/service/hid/controllers/npad.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/sm/sm.h"
namespace FileSys {
constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
u64 Cheat::Address() const {
u64 out;
std::memcpy(&out, raw.data(), sizeof(u64));
return Common::swap64(out) & 0xFFFFFFFFFF;
}
u64 Cheat::ValueWidth(u64 offset) const {
return Value(offset, width);
}
u64 Cheat::Value(u64 offset, u64 width) const {
u64 out;
std::memcpy(&out, raw.data() + offset, sizeof(u64));
out = Common::swap64(out);
if (width == 8)
return out;
return out & ((1ull << (width * CHAR_BIT)) - 1);
}
u32 Cheat::KeypadValue() const {
u32 out;
std::memcpy(&out, raw.data(), sizeof(u32));
return Common::swap32(out) & 0x0FFFFFFF;
}
void CheatList::SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end,
VAddr heap_end, MemoryWriter writer, MemoryReader reader) {
this->main_region_begin = main_begin;
this->main_region_end = main_end;
this->heap_region_begin = heap_begin;
this->heap_region_end = heap_end;
this->writer = writer;
this->reader = reader;
}
MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
void CheatList::Execute() {
MICROPROFILE_SCOPE(Cheat_Engine);
std::fill(scratch.begin(), scratch.end(), 0);
in_standard = false;
for (std::size_t i = 0; i < master_list.size(); ++i) {
LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, master_list[i].first);
current_block = i;
ExecuteBlock(master_list[i].second);
}
in_standard = true;
for (std::size_t i = 0; i < standard_list.size(); ++i) {
LOG_DEBUG(Common_Filesystem, "Executing block #{:08X} ({})", i, standard_list[i].first);
current_block = i;
ExecuteBlock(standard_list[i].second);
}
}
CheatList::CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard)
: master_list{std::move(master)}, standard_list{std::move(standard)}, system{&system_} {}
bool CheatList::EvaluateConditional(const Cheat& cheat) const {
using ComparisonFunction = bool (*)(u64, u64);
constexpr std::array<ComparisonFunction, 6> comparison_functions{
[](u64 a, u64 b) { return a > b; }, [](u64 a, u64 b) { return a >= b; },
[](u64 a, u64 b) { return a < b; }, [](u64 a, u64 b) { return a <= b; },
[](u64 a, u64 b) { return a == b; }, [](u64 a, u64 b) { return a != b; },
};
if (cheat.type == CodeType::ConditionalInput) {
const auto applet_resource =
system->ServiceManager().GetService<Service::HID::Hid>("hid")->GetAppletResource();
if (applet_resource == nullptr) {
LOG_WARNING(
Common_Filesystem,
"Attempted to evaluate input conditional, but applet resource is not initialized!");
return false;
}
const auto press_state =
applet_resource
->GetController<Service::HID::Controller_NPad>(Service::HID::HidController::NPad)
.GetAndResetPressState();
return ((press_state & cheat.KeypadValue()) & KEYPAD_BITMASK) != 0;
}
ASSERT(cheat.type == CodeType::Conditional);
const auto offset =
cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
ASSERT(static_cast<u8>(cheat.comparison_op.Value()) < 6);
auto* function = comparison_functions[static_cast<u8>(cheat.comparison_op.Value())];
const auto addr = cheat.Address() + offset;
return function(reader(cheat.width, SanitizeAddress(addr)), cheat.ValueWidth(8));
}
void CheatList::ProcessBlockPairs(const Block& block) {
block_pairs.clear();
u64 scope = 0;
std::map<u64, u64> pairs;
for (std::size_t i = 0; i < block.size(); ++i) {
const auto& cheat = block[i];
switch (cheat.type) {
case CodeType::Conditional:
case CodeType::ConditionalInput:
pairs.insert_or_assign(scope, i);
++scope;
break;
case CodeType::EndConditional: {
--scope;
const auto idx = pairs.at(scope);
block_pairs.insert_or_assign(idx, i);
break;
}
case CodeType::Loop: {
if (cheat.end_of_loop) {
--scope;
const auto idx = pairs.at(scope);
block_pairs.insert_or_assign(idx, i);
} else {
pairs.insert_or_assign(scope, i);
++scope;
}
break;
}
}
}
}
void CheatList::WriteImmediate(const Cheat& cheat) {
const auto offset =
cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
const auto& register_3 = scratch.at(cheat.register_3);
const auto addr = cheat.Address() + offset + register_3;
LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}", addr,
cheat.Value(8, cheat.width));
writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(8));
}
void CheatList::BeginConditional(const Cheat& cheat) {
if (EvaluateConditional(cheat)) {
return;
}
const auto iter = block_pairs.find(current_index);
ASSERT(iter != block_pairs.end());
current_index = iter->second - 1;
}
void CheatList::EndConditional(const Cheat& cheat) {
LOG_DEBUG(Common_Filesystem, "Ending conditional block.");
}
void CheatList::Loop(const Cheat& cheat) {
if (cheat.end_of_loop.Value())
ASSERT(!cheat.end_of_loop.Value());
auto& register_3 = scratch.at(cheat.register_3);
const auto iter = block_pairs.find(current_index);
ASSERT(iter != block_pairs.end());
ASSERT(iter->first < iter->second);
const s32 initial_value = static_cast<s32>(cheat.Value(4, sizeof(s32)));
for (s32 i = initial_value; i >= 0; --i) {
register_3 = static_cast<u64>(i);
for (std::size_t c = iter->first + 1; c < iter->second; ++c) {
current_index = c;
ExecuteSingleCheat(
(in_standard ? standard_list : master_list)[current_block].second[c]);
}
}
current_index = iter->second;
}
void CheatList::LoadImmediate(const Cheat& cheat) {
auto& register_3 = scratch.at(cheat.register_3);
LOG_DEBUG(Common_Filesystem, "setting register={:01X} equal to value={:016X}", cheat.register_3,
cheat.Value(4, 8));
register_3 = cheat.Value(4, 8);
}
void CheatList::LoadIndexed(const Cheat& cheat) {
const auto offset =
cheat.memory_type == MemoryType::MainNSO ? main_region_begin : heap_region_begin;
auto& register_3 = scratch.at(cheat.register_3);
const auto addr = (cheat.load_from_register.Value() ? register_3 : offset) + cheat.Address();
LOG_DEBUG(Common_Filesystem, "writing indexed value to register={:01X}, addr={:016X}",
cheat.register_3, addr);
register_3 = reader(cheat.width, SanitizeAddress(addr));
}
void CheatList::StoreIndexed(const Cheat& cheat) {
const auto& register_3 = scratch.at(cheat.register_3);
const auto addr =
register_3 + (cheat.add_additional_register.Value() ? scratch.at(cheat.register_6) : 0);
LOG_DEBUG(Common_Filesystem, "writing value={:016X} to addr={:016X}",
cheat.Value(4, cheat.width), addr);
writer(cheat.width, SanitizeAddress(addr), cheat.ValueWidth(4));
}
void CheatList::RegisterArithmetic(const Cheat& cheat) {
using ArithmeticFunction = u64 (*)(u64, u64);
constexpr std::array<ArithmeticFunction, 5> arithmetic_functions{
[](u64 a, u64 b) { return a + b; }, [](u64 a, u64 b) { return a - b; },
[](u64 a, u64 b) { return a * b; }, [](u64 a, u64 b) { return a << b; },
[](u64 a, u64 b) { return a >> b; },
};
using ArithmeticOverflowCheck = bool (*)(u64, u64);
constexpr std::array<ArithmeticOverflowCheck, 5> arithmetic_overflow_checks{
[](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() - b); }, // a + b
[](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() + b); }, // a - b
[](u64 a, u64 b) { return a > (std::numeric_limits<u64>::max() / b); }, // a * b
[](u64 a, u64 b) { return b >= 64 || (a & ~((1ull << (64 - b)) - 1)) != 0; }, // a << b
[](u64 a, u64 b) { return b >= 64 || (a & ((1ull << b) - 1)) != 0; }, // a >> b
};
static_assert(sizeof(arithmetic_functions) == sizeof(arithmetic_overflow_checks),
"Missing or have extra arithmetic overflow checks compared to functions!");
auto& register_3 = scratch.at(cheat.register_3);
ASSERT(static_cast<u8>(cheat.arithmetic_op.Value()) < 5);
auto* function = arithmetic_functions[static_cast<u8>(cheat.arithmetic_op.Value())];
auto* overflow_function =
arithmetic_overflow_checks[static_cast<u8>(cheat.arithmetic_op.Value())];
LOG_DEBUG(Common_Filesystem, "performing arithmetic with register={:01X}, value={:016X}",
cheat.register_3, cheat.ValueWidth(4));
if (overflow_function(register_3, cheat.ValueWidth(4))) {
LOG_WARNING(Common_Filesystem,
"overflow will occur when performing arithmetic operation={:02X} with operands "
"a={:016X}, b={:016X}!",
static_cast<u8>(cheat.arithmetic_op.Value()), register_3, cheat.ValueWidth(4));
}
register_3 = function(register_3, cheat.ValueWidth(4));
}
void CheatList::BeginConditionalInput(const Cheat& cheat) {
if (EvaluateConditional(cheat))
return;
const auto iter = block_pairs.find(current_index);
ASSERT(iter != block_pairs.end());
current_index = iter->second - 1;
}
VAddr CheatList::SanitizeAddress(VAddr in) const {
if ((in < main_region_begin || in >= main_region_end) &&
(in < heap_region_begin || in >= heap_region_end)) {
LOG_ERROR(Common_Filesystem,
"Cheat attempting to access memory at invalid address={:016X}, if this persists, "
"the cheat may be incorrect. However, this may be normal early in execution if "
"the game has not properly set up yet.",
in);
return 0; ///< Invalid addresses will hard crash
}
return in;
}
void CheatList::ExecuteSingleCheat(const Cheat& cheat) {
using CheatOperationFunction = void (CheatList::*)(const Cheat&);
constexpr std::array<CheatOperationFunction, 9> cheat_operation_functions{
&CheatList::WriteImmediate, &CheatList::BeginConditional,
&CheatList::EndConditional, &CheatList::Loop,
&CheatList::LoadImmediate, &CheatList::LoadIndexed,
&CheatList::StoreIndexed, &CheatList::RegisterArithmetic,
&CheatList::BeginConditionalInput,
};
const auto index = static_cast<u8>(cheat.type.Value());
ASSERT(index < sizeof(cheat_operation_functions));
const auto op = cheat_operation_functions[index];
(this->*op)(cheat);
}
void CheatList::ExecuteBlock(const Block& block) {
encountered_loops.clear();
ProcessBlockPairs(block);
for (std::size_t i = 0; i < block.size(); ++i) {
current_index = i;
ExecuteSingleCheat(block[i]);
i = current_index;
}
}
CheatParser::~CheatParser() = default;
CheatList CheatParser::MakeCheatList(const Core::System& system, CheatList::ProgramSegment master,
CheatList::ProgramSegment standard) const {
return {system, std::move(master), std::move(standard)};
}
TextCheatParser::~TextCheatParser() = default;
CheatList TextCheatParser::Parse(const Core::System& system, const std::vector<u8>& data) const {
std::stringstream ss;
ss.write(reinterpret_cast<const char*>(data.data()), data.size());
std::vector<std::string> lines;
std::string stream_line;
while (std::getline(ss, stream_line)) {
// Remove a trailing \r
if (!stream_line.empty() && stream_line.back() == '\r')
stream_line.pop_back();
lines.push_back(std::move(stream_line));
}
CheatList::ProgramSegment master_list;
CheatList::ProgramSegment standard_list;
for (std::size_t i = 0; i < lines.size(); ++i) {
auto line = lines[i];
if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
const auto master = line[0] == '{';
const auto begin = master ? line.find('{') : line.find('[');
const auto end = master ? line.rfind('}') : line.rfind(']');
ASSERT(begin != std::string::npos && end != std::string::npos);
const std::string patch_name{line.begin() + begin + 1, line.begin() + end};
CheatList::Block block{};
while (i < lines.size() - 1) {
line = lines[++i];
if (!line.empty() && (line[0] == '[' || line[0] == '{')) {
--i;
break;
}
if (line.size() < 8)
continue;
Cheat out{};
out.raw = ParseSingleLineCheat(line);
block.push_back(out);
}
(master ? master_list : standard_list).emplace_back(patch_name, block);
}
}
return MakeCheatList(system, master_list, standard_list);
}
std::array<u8, 16> TextCheatParser::ParseSingleLineCheat(const std::string& line) const {
std::array<u8, 16> out{};
if (line.size() < 8)
return out;
const auto word1 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data(), 8});
std::memcpy(out.data(), word1.data(), sizeof(u32));
if (line.size() < 17 || line[8] != ' ')
return out;
const auto word2 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 9, 8});
std::memcpy(out.data() + sizeof(u32), word2.data(), sizeof(u32));
if (line.size() < 26 || line[17] != ' ') {
// Perform shifting in case value is truncated early.
const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
if (type == CodeType::Loop || type == CodeType::LoadImmediate ||
type == CodeType::StoreIndexed || type == CodeType::RegisterArithmetic) {
std::memcpy(out.data() + 8, out.data() + 4, sizeof(u32));
std::memset(out.data() + 4, 0, sizeof(u32));
}
return out;
}
const auto word3 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 18, 8});
std::memcpy(out.data() + 2 * sizeof(u32), word3.data(), sizeof(u32));
if (line.size() < 35 || line[26] != ' ') {
// Perform shifting in case value is truncated early.
const auto type = static_cast<CodeType>((out[0] & 0xF0) >> 4);
if (type == CodeType::WriteImmediate || type == CodeType::Conditional) {
std::memcpy(out.data() + 12, out.data() + 8, sizeof(u32));
std::memset(out.data() + 8, 0, sizeof(u32));
}
return out;
}
const auto word4 = Common::HexStringToArray<sizeof(u32)>(std::string_view{line.data() + 27, 8});
std::memcpy(out.data() + 3 * sizeof(u32), word4.data(), sizeof(u32));
return out;
}
u64 MemoryReadImpl(u32 width, VAddr addr) {
switch (width) {
case 1:
return Memory::Read8(addr);
case 2:
return Memory::Read16(addr);
case 4:
return Memory::Read32(addr);
case 8:
return Memory::Read64(addr);
default:
UNREACHABLE();
return 0;
}
}
void MemoryWriteImpl(u32 width, VAddr addr, u64 value) {
switch (width) {
case 1:
Memory::Write8(addr, static_cast<u8>(value));
break;
case 2:
Memory::Write16(addr, static_cast<u16>(value));
break;
case 4:
Memory::Write32(addr, static_cast<u32>(value));
break;
case 8:
Memory::Write64(addr, value);
break;
default:
UNREACHABLE();
}
}
CheatEngine::CheatEngine(Core::System& system, std::vector<CheatList> cheats_,
const std::string& build_id, VAddr code_region_start,
VAddr code_region_end)
: cheats{std::move(cheats_)}, core_timing{system.CoreTiming()} {
event = core_timing.RegisterEvent(
"CheatEngine::FrameCallback::" + build_id,
[this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
const auto& vm_manager = system.CurrentProcess()->VMManager();
for (auto& list : this->cheats) {
list.SetMemoryParameters(code_region_start, vm_manager.GetHeapRegionBaseAddress(),
code_region_end, vm_manager.GetHeapRegionEndAddress(),
&MemoryWriteImpl, &MemoryReadImpl);
}
}
CheatEngine::~CheatEngine() {
core_timing.UnscheduleEvent(event, 0);
}
void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
for (auto& list : cheats) {
list.Execute();
}
core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
}
} // namespace FileSys

View File

@@ -0,0 +1,234 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <map>
#include <set>
#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Core {
class System;
}
namespace Core::Timing {
class CoreTiming;
struct EventType;
} // namespace Core::Timing
namespace FileSys {
enum class CodeType : u32 {
// 0TMR00AA AAAAAAAA YYYYYYYY YYYYYYYY
// Writes a T sized value Y to the address A added to the value of register R in memory domain M
WriteImmediate = 0,
// 1TMC00AA AAAAAAAA YYYYYYYY YYYYYYYY
// Compares the T sized value Y to the value at address A in memory domain M using the
// conditional function C. If success, continues execution. If failure, jumps to the matching
// EndConditional statement.
Conditional = 1,
// 20000000
// Terminates a Conditional or ConditionalInput block.
EndConditional = 2,
// 300R0000 VVVVVVVV
// Starts looping V times, storing the current count in register R.
// Loop block is terminated with a matching 310R0000.
Loop = 3,
// 400R0000 VVVVVVVV VVVVVVVV
// Sets the value of register R to the value V.
LoadImmediate = 4,
// 5TMRI0AA AAAAAAAA
// Sets the value of register R to the value of width T at address A in memory domain M, with
// the current value of R added to the address if I == 1.
LoadIndexed = 5,
// 6T0RIFG0 VVVVVVVV VVVVVVVV
// Writes the value V of width T to the memory address stored in register R. Adds the value of
// register G to the final calculation if F is nonzero. Increments the value of register R by T
// after operation if I is nonzero.
StoreIndexed = 6,
// 7T0RA000 VVVVVVVV
// Performs the arithmetic operation A on the value in register R and the value V of width T,
// storing the result in register R.
RegisterArithmetic = 7,
// 8KKKKKKK
// Checks to see if any of the buttons defined by the bitmask K are pressed. If any are,
// execution continues. If none are, execution skips to the next EndConditional command.
ConditionalInput = 8,
};
enum class MemoryType : u32 {
// Addressed relative to start of main NSO
MainNSO = 0,
// Addressed relative to start of heap
Heap = 1,
};
enum class ArithmeticOp : u32 {
Add = 0,
Sub = 1,
Mult = 2,
LShift = 3,
RShift = 4,
};
enum class ComparisonOp : u32 {
GreaterThan = 1,
GreaterThanEqual = 2,
LessThan = 3,
LessThanEqual = 4,
Equal = 5,
Inequal = 6,
};
union Cheat {
std::array<u8, 16> raw;
BitField<4, 4, CodeType> type;
BitField<0, 4, u32> width; // Can be 1, 2, 4, or 8. Measured in bytes.
BitField<0, 4, u32> end_of_loop;
BitField<12, 4, MemoryType> memory_type;
BitField<8, 4, u32> register_3;
BitField<8, 4, ComparisonOp> comparison_op;
BitField<20, 4, u32> load_from_register;
BitField<20, 4, u32> increment_register;
BitField<20, 4, ArithmeticOp> arithmetic_op;
BitField<16, 4, u32> add_additional_register;
BitField<28, 4, u32> register_6;
u64 Address() const;
u64 ValueWidth(u64 offset) const;
u64 Value(u64 offset, u64 width) const;
u32 KeypadValue() const;
};
class CheatParser;
// Represents a full collection of cheats for a game. The Execute function should be called every
// interval that all cheats should be executed. Clients should not directly instantiate this class
// (hence private constructor), they should instead receive an instance from CheatParser, which
// guarantees the list is always in an acceptable state.
class CheatList {
public:
friend class CheatParser;
using Block = std::vector<Cheat>;
using ProgramSegment = std::vector<std::pair<std::string, Block>>;
// (width in bytes, address, value)
using MemoryWriter = void (*)(u32, VAddr, u64);
// (width in bytes, address) -> value
using MemoryReader = u64 (*)(u32, VAddr);
void SetMemoryParameters(VAddr main_begin, VAddr heap_begin, VAddr main_end, VAddr heap_end,
MemoryWriter writer, MemoryReader reader);
void Execute();
private:
CheatList(const Core::System& system_, ProgramSegment master, ProgramSegment standard);
void ProcessBlockPairs(const Block& block);
void ExecuteSingleCheat(const Cheat& cheat);
void ExecuteBlock(const Block& block);
bool EvaluateConditional(const Cheat& cheat) const;
// Individual cheat operations
void WriteImmediate(const Cheat& cheat);
void BeginConditional(const Cheat& cheat);
void EndConditional(const Cheat& cheat);
void Loop(const Cheat& cheat);
void LoadImmediate(const Cheat& cheat);
void LoadIndexed(const Cheat& cheat);
void StoreIndexed(const Cheat& cheat);
void RegisterArithmetic(const Cheat& cheat);
void BeginConditionalInput(const Cheat& cheat);
VAddr SanitizeAddress(VAddr in) const;
// Master Codes are defined as codes that cannot be disabled and are run prior to all
// others.
ProgramSegment master_list;
// All other codes
ProgramSegment standard_list;
bool in_standard = false;
// 16 (0x0-0xF) scratch registers that can be used by cheats
std::array<u64, 16> scratch{};
MemoryWriter writer = nullptr;
MemoryReader reader = nullptr;
u64 main_region_begin{};
u64 heap_region_begin{};
u64 main_region_end{};
u64 heap_region_end{};
u64 current_block{};
// The current index of the cheat within the current Block
u64 current_index{};
// The 'stack' of the program. When a conditional or loop statement is encountered, its index is
// pushed onto this queue. When a end block is encountered, the condition is checked.
std::map<u64, u64> block_pairs;
std::set<u64> encountered_loops;
const Core::System* system;
};
// Intermediary class that parses a text file or other disk format for storing cheats into a
// CheatList object, that can be used for execution.
class CheatParser {
public:
virtual ~CheatParser();
virtual CheatList Parse(const Core::System& system, const std::vector<u8>& data) const = 0;
protected:
CheatList MakeCheatList(const Core::System& system_, CheatList::ProgramSegment master,
CheatList::ProgramSegment standard) const;
};
// CheatParser implementation that parses text files
class TextCheatParser final : public CheatParser {
public:
~TextCheatParser() override;
CheatList Parse(const Core::System& system, const std::vector<u8>& data) const override;
private:
std::array<u8, 16> ParseSingleLineCheat(const std::string& line) const;
};
// Class that encapsulates a CheatList and manages its interaction with memory and CoreTiming
class CheatEngine final {
public:
CheatEngine(Core::System& system_, std::vector<CheatList> cheats_, const std::string& build_id,
VAddr code_region_start, VAddr code_region_end);
~CheatEngine();
private:
void FrameCallback(u64 userdata, s64 cycles_late);
std::vector<CheatList> cheats;
Core::Timing::EventType* event;
Core::Timing::CoreTiming& core_timing;
};
} // namespace FileSys

View File

@@ -24,13 +24,26 @@ namespace FileSys {
union NCASectionHeader;
/// Describes the type of content within an NCA archive.
enum class NCAContentType : u8 {
/// Executable-related data
Program = 0,
/// Metadata.
Meta = 1,
/// Access control data.
Control = 2,
/// Information related to the game manual
/// e.g. Legal information, etc.
Manual = 3,
/// System data.
Data = 4,
Data_Unknown5 = 5, ///< Seems to be used on some system archives
/// Data that can be accessed by applications.
PublicData = 5,
};
enum class NCASectionCryptoType : u8 {

View File

@@ -7,6 +7,7 @@
#include <cstddef>
#include <cstring>
#include "common/file_util.h"
#include "common/hex_util.h"
#include "common/logging/log.h"
#include "core/file_sys/content_archive.h"
@@ -232,6 +233,57 @@ bool PatchManager::HasNSOPatch(const std::array<u8, 32>& build_id_) const {
return !CollectPatches(patch_dirs, build_id).empty();
}
static std::optional<CheatList> ReadCheatFileFromFolder(const Core::System& system, u64 title_id,
const std::array<u8, 0x20>& build_id_,
const VirtualDir& base_path, bool upper) {
const auto build_id_raw = Common::HexArrayToString(build_id_, upper);
const auto build_id = build_id_raw.substr(0, sizeof(u64) * 2);
const auto file = base_path->GetFile(fmt::format("{}.txt", build_id));
if (file == nullptr) {
LOG_INFO(Common_Filesystem, "No cheats file found for title_id={:016X}, build_id={}",
title_id, build_id);
return std::nullopt;
}
std::vector<u8> data(file->GetSize());
if (file->Read(data.data(), data.size()) != data.size()) {
LOG_INFO(Common_Filesystem, "Failed to read cheats file for title_id={:016X}, build_id={}",
title_id, build_id);
return std::nullopt;
}
TextCheatParser parser;
return parser.Parse(system, data);
}
std::vector<CheatList> PatchManager::CreateCheatList(const Core::System& system,
const std::array<u8, 32>& build_id_) const {
const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
auto patch_dirs = load_dir->GetSubdirectories();
std::sort(patch_dirs.begin(), patch_dirs.end(),
[](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); });
std::vector<CheatList> out;
out.reserve(patch_dirs.size());
for (const auto& subdir : patch_dirs) {
auto cheats_dir = subdir->GetSubdirectory("cheats");
if (cheats_dir != nullptr) {
auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
if (res.has_value()) {
out.push_back(std::move(*res));
continue;
}
res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, false);
if (res.has_value())
out.push_back(std::move(*res));
}
}
return out;
}
static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type) {
const auto load_dir = Service::FileSystem::GetModificationLoadRoot(title_id);
if ((type != ContentRecordType::Program && type != ContentRecordType::Data) ||
@@ -403,6 +455,8 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
}
if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
AppendCommaIfNotEmpty(types, "LayeredFS");
if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
AppendCommaIfNotEmpty(types, "Cheats");
if (types.empty())
continue;

View File

@@ -8,9 +8,14 @@
#include <memory>
#include <string>
#include "common/common_types.h"
#include "core/file_sys/cheat_engine.h"
#include "core/file_sys/nca_metadata.h"
#include "core/file_sys/vfs.h"
namespace Core {
class System;
}
namespace FileSys {
class NCA;
@@ -45,6 +50,10 @@ public:
// Used to prevent expensive copies in NSO loader.
bool HasNSOPatch(const std::array<u8, 0x20>& build_id) const;
// Creates a CheatList object with all
std::vector<CheatList> CreateCheatList(const Core::System& system,
const std::array<u8, 0x20>& build_id) const;
// Currently tracked RomFS patches:
// - Game Updates
// - LayeredFS

View File

@@ -94,7 +94,7 @@ static ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
case NCAContentType::Control:
return ContentRecordType::Control;
case NCAContentType::Data:
case NCAContentType::Data_Unknown5:
case NCAContentType::PublicData:
return ContentRecordType::Data;
case NCAContentType::Manual:
// TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.

View File

@@ -39,10 +39,10 @@ struct CommandHeader {
union {
u32_le raw_low;
BitField<0, 16, CommandType> type;
BitField<16, 4, u32_le> num_buf_x_descriptors;
BitField<20, 4, u32_le> num_buf_a_descriptors;
BitField<24, 4, u32_le> num_buf_b_descriptors;
BitField<28, 4, u32_le> num_buf_w_descriptors;
BitField<16, 4, u32> num_buf_x_descriptors;
BitField<20, 4, u32> num_buf_a_descriptors;
BitField<24, 4, u32> num_buf_b_descriptors;
BitField<28, 4, u32> num_buf_w_descriptors;
};
enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {
union {
u32_le raw_high;
BitField<0, 10, u32_le> data_size;
BitField<0, 10, u32> data_size;
BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
BitField<31, 1, u32_le> enable_handle_descriptor;
BitField<31, 1, u32> enable_handle_descriptor;
};
};
static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");
union HandleDescriptorHeader {
u32_le raw_high;
BitField<0, 1, u32_le> send_current_pid;
BitField<1, 4, u32_le> num_handles_to_copy;
BitField<5, 4, u32_le> num_handles_to_move;
BitField<0, 1, u32> send_current_pid;
BitField<1, 4, u32> num_handles_to_copy;
BitField<5, 4, u32> num_handles_to_move;
};
static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");
struct BufferDescriptorX {
union {
BitField<0, 6, u32_le> counter_bits_0_5;
BitField<6, 3, u32_le> address_bits_36_38;
BitField<9, 3, u32_le> counter_bits_9_11;
BitField<12, 4, u32_le> address_bits_32_35;
BitField<16, 16, u32_le> size;
BitField<0, 6, u32> counter_bits_0_5;
BitField<6, 3, u32> address_bits_36_38;
BitField<9, 3, u32> counter_bits_9_11;
BitField<12, 4, u32> address_bits_32_35;
BitField<16, 16, u32> size;
};
u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
u32_le address_bits_0_31;
union {
BitField<0, 2, u32_le> flags;
BitField<2, 3, u32_le> address_bits_36_38;
BitField<24, 4, u32_le> size_bits_32_35;
BitField<28, 4, u32_le> address_bits_32_35;
BitField<0, 2, u32> flags;
BitField<2, 3, u32> address_bits_36_38;
BitField<24, 4, u32> size_bits_32_35;
BitField<28, 4, u32> address_bits_32_35;
};
VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
u32_le address_bits_0_31;
union {
BitField<0, 16, u32_le> address_bits_32_47;
BitField<16, 16, u32_le> size;
BitField<0, 16, u32> address_bits_32_47;
BitField<16, 16, u32> size;
};
VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
struct {
union {
BitField<0, 8, CommandType> command;
BitField<8, 8, u32_le> input_object_count;
BitField<16, 16, u32_le> size;
BitField<8, 8, u32> input_object_count;
BitField<16, 16, u32> size;
};
u32_le object_id;
INSERT_PADDING_WORDS(2);

View File

@@ -274,6 +274,20 @@ inline void ResponseBuilder::Push(u64 value) {
Push(static_cast<u32>(value >> 32));
}
template <>
inline void ResponseBuilder::Push(float value) {
u32 integral;
std::memcpy(&integral, &value, sizeof(u32));
Push(integral);
}
template <>
inline void ResponseBuilder::Push(double value) {
u64 integral;
std::memcpy(&integral, &value, sizeof(u64));
Push(integral);
}
template <>
inline void ResponseBuilder::Push(bool value) {
Push(static_cast<u8>(value));
@@ -415,6 +429,22 @@ inline s64 RequestParser::Pop() {
return static_cast<s64>(Pop<u64>());
}
template <>
inline float RequestParser::Pop() {
const u32 value = Pop<u32>();
float real;
std::memcpy(&real, &value, sizeof(real));
return real;
}
template <>
inline double RequestParser::Pop() {
const u64 value = Pop<u64>();
float real;
std::memcpy(&real, &value, sizeof(real));
return real;
}
template <>
inline bool RequestParser::Pop() {
return Pop<u8>() != 0;

View File

@@ -0,0 +1,12 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/hle/kernel/code_set.h"
namespace Kernel {
CodeSet::CodeSet() = default;
CodeSet::~CodeSet() = default;
} // namespace Kernel

View File

@@ -0,0 +1,90 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <memory>
#include <vector>
#include "common/common_types.h"
namespace Kernel {
/**
* Represents executable data that may be loaded into a kernel process.
*
* A code set consists of three basic segments:
* - A code (AKA text) segment,
* - A read-only data segment (rodata)
* - A data segment
*
* The code segment is the portion of the object file that contains
* executable instructions.
*
* The read-only data segment in the portion of the object file that
* contains (as one would expect) read-only data, such as fixed constant
* values and data structures.
*
* The data segment is similar to the read-only data segment -- it contains
* variables and data structures that have predefined values, however,
* entities within this segment can be modified.
*/
struct CodeSet final {
/// A single segment within a code set.
struct Segment final {
/// The byte offset that this segment is located at.
std::size_t offset = 0;
/// The address to map this segment to.
VAddr addr = 0;
/// The size of this segment in bytes.
u32 size = 0;
};
explicit CodeSet();
~CodeSet();
CodeSet(const CodeSet&) = delete;
CodeSet& operator=(const CodeSet&) = delete;
CodeSet(CodeSet&&) = default;
CodeSet& operator=(CodeSet&&) = default;
Segment& CodeSegment() {
return segments[0];
}
const Segment& CodeSegment() const {
return segments[0];
}
Segment& RODataSegment() {
return segments[1];
}
const Segment& RODataSegment() const {
return segments[1];
}
Segment& DataSegment() {
return segments[2];
}
const Segment& DataSegment() const {
return segments[2];
}
/// The overall data that backs this code set.
std::shared_ptr<std::vector<u8>> memory;
/// The segments that comprise this code set.
std::array<Segment, 3> segments;
/// The entry point address for this code set.
VAddr entrypoint = 0;
};
} // namespace Kernel

View File

@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/file_sys/program_metadata.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
@@ -31,7 +32,7 @@ namespace {
*/
void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
// Setup page table so we can write to memory
SetCurrentPageTable(&owner_process.VMManager().page_table);
Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
// Initialize new "main" thread
const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
@@ -50,9 +51,6 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
}
} // Anonymous namespace
CodeSet::CodeSet() = default;
CodeSet::~CodeSet() = default;
SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
auto& kernel = system.Kernel();
@@ -212,7 +210,7 @@ void Process::FreeTLSSlot(VAddr tls_address) {
}
void Process::LoadModule(CodeSet module_, VAddr base_addr) {
const auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions,
const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
MemoryState memory_state) {
const auto vma = vm_manager
.MapMemoryBlock(segment.addr + base_addr, module_.memory,
@@ -222,9 +220,9 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
};
// Map CodeSet segments
MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::CodeStatic);
MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeMutable);
MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);
MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
// Clear instruction cache in CPU JIT
system.InvalidateCpuInstructionCaches();

View File

@@ -7,7 +7,6 @@
#include <array>
#include <bitset>
#include <cstddef>
#include <memory>
#include <string>
#include <vector>
#include <boost/container/static_vector.hpp>
@@ -34,6 +33,8 @@ class KernelCore;
class ResourceLimit;
class Thread;
struct CodeSet;
struct AddressMapping {
// Address and size must be page-aligned
VAddr address;
@@ -66,46 +67,6 @@ enum class ProcessStatus {
DebugBreak,
};
struct CodeSet final {
struct Segment {
std::size_t offset = 0;
VAddr addr = 0;
u32 size = 0;
};
explicit CodeSet();
~CodeSet();
Segment& CodeSegment() {
return segments[0];
}
const Segment& CodeSegment() const {
return segments[0];
}
Segment& RODataSegment() {
return segments[1];
}
const Segment& RODataSegment() const {
return segments[1];
}
Segment& DataSegment() {
return segments[2];
}
const Segment& DataSegment() const {
return segments[2];
}
std::shared_ptr<std::vector<u8>> memory;
std::array<Segment, 3> segments;
VAddr entrypoint = 0;
};
class Process final : public WaitObject {
public:
enum : u64 {

View File

@@ -96,7 +96,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
auto* const thread_owner_process = current_thread->GetOwnerProcess();
if (previous_process != thread_owner_process) {
system.Kernel().MakeCurrentProcess(thread_owner_process);
SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
}
cpu_core.LoadContext(new_thread->GetContext());
@@ -199,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
// Yield this thread -- sleep for zero time and force reschedule to different thread
WaitCurrentThread_Sleep();
GetCurrentThread()->WakeAfterDelay(0);
GetCurrentThread()->Sleep(0);
}
void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -215,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
ASSERT(priority < THREADPRIO_COUNT);
// Sleep for zero time to be able to force reschedule to different thread
WaitCurrentThread_Sleep();
GetCurrentThread()->WakeAfterDelay(0);
GetCurrentThread()->Sleep(0);
Thread* suggested_thread = nullptr;

View File

@@ -1285,10 +1285,14 @@ static ResultCode StartThread(Handle thread_handle) {
/// Called when a thread exits
static void ExitThread() {
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
auto& system = Core::System::GetInstance();
ExitCurrentThread();
Core::System::GetInstance().PrepareReschedule();
LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->Stop();
system.CurrentScheduler().RemoveThread(current_thread);
system.PrepareReschedule();
}
/// Sleep the current thread
@@ -1301,32 +1305,32 @@ static void SleepThread(s64 nanoseconds) {
YieldAndWaitForLoadBalancing = -2,
};
auto& system = Core::System::GetInstance();
auto& scheduler = system.CurrentScheduler();
auto* const current_thread = scheduler.GetCurrentThread();
if (nanoseconds <= 0) {
auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
switch (static_cast<SleepType>(nanoseconds)) {
case SleepType::YieldWithoutLoadBalancing:
scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
scheduler.YieldWithoutLoadBalancing(current_thread);
break;
case SleepType::YieldWithLoadBalancing:
scheduler.YieldWithLoadBalancing(GetCurrentThread());
scheduler.YieldWithLoadBalancing(current_thread);
break;
case SleepType::YieldAndWaitForLoadBalancing:
scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
scheduler.YieldAndWaitForLoadBalancing(current_thread);
break;
default:
UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
}
} else {
// Sleep current thread and check for next thread to schedule
WaitCurrentThread_Sleep();
// Create an event to wake the thread up after the specified nanosecond delay has passed
GetCurrentThread()->WakeAfterDelay(nanoseconds);
current_thread->Sleep(nanoseconds);
}
// Reschedule all CPU cores
for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
Core::System::GetInstance().CpuCore(i).PrepareReschedule();
for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
system.CpuCore(i).PrepareReschedule();
}
}
/// Wait process wide key atomic

View File

@@ -7,8 +7,6 @@
#include <optional>
#include <vector>
#include <boost/range/algorithm_ext/erase.hpp>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -68,17 +66,6 @@ void Thread::Stop() {
owner_process->FreeTLSSlot(tls_address);
}
void WaitCurrentThread_Sleep() {
Thread* thread = GetCurrentThread();
thread->SetStatus(ThreadStatus::WaitSleep);
}
void ExitCurrentThread() {
Thread* thread = GetCurrentThread();
thread->Stop();
Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
}
void Thread::WakeAfterDelay(s64 nanoseconds) {
// Don't schedule a wakeup if the thread wants to wait forever
if (nanoseconds == -1)
@@ -269,8 +256,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
if (thread->lock_owner == this) {
// If the thread is already waiting for this thread to release the mutex, ensure that the
// waiters list is consistent and return without doing anything.
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(itr != wait_mutex_threads.end());
const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(iter != wait_mutex_threads.end());
return;
}
@@ -278,11 +265,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
ASSERT(thread->lock_owner == nullptr);
// Ensure that the thread is not already in the list of mutex waiters
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(itr == wait_mutex_threads.end());
const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(iter == wait_mutex_threads.end());
// Keep the list in an ordered fashion
const auto insertion_point = std::find_if(
wait_mutex_threads.begin(), wait_mutex_threads.end(),
[&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
wait_mutex_threads.insert(insertion_point, thread);
thread->lock_owner = this;
wait_mutex_threads.emplace_back(std::move(thread));
UpdatePriority();
}
@@ -290,32 +282,44 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
ASSERT(thread->lock_owner == this);
// Ensure that the thread is in the list of mutex waiters
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(itr != wait_mutex_threads.end());
const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
ASSERT(iter != wait_mutex_threads.end());
wait_mutex_threads.erase(iter);
boost::remove_erase(wait_mutex_threads, thread);
thread->lock_owner = nullptr;
UpdatePriority();
}
void Thread::UpdatePriority() {
// Find the highest priority among all the threads that are waiting for this thread's lock
// If any of the threads waiting on the mutex have a higher priority
// (taking into account priority inheritance), then this thread inherits
// that thread's priority.
u32 new_priority = nominal_priority;
for (const auto& thread : wait_mutex_threads) {
if (thread->nominal_priority < new_priority)
new_priority = thread->nominal_priority;
if (!wait_mutex_threads.empty()) {
if (wait_mutex_threads.front()->current_priority < new_priority) {
new_priority = wait_mutex_threads.front()->current_priority;
}
}
if (new_priority == current_priority)
if (new_priority == current_priority) {
return;
}
scheduler->SetThreadPriority(this, new_priority);
current_priority = new_priority;
if (!lock_owner) {
return;
}
// Ensure that the thread is within the correct location in the waiting list.
auto old_owner = lock_owner;
lock_owner->RemoveMutexWaiter(this);
old_owner->AddMutexWaiter(this);
// Recursively update the priority of the thread that depends on the priority of this one.
if (lock_owner)
lock_owner->UpdatePriority();
lock_owner->UpdatePriority();
}
void Thread::ChangeCore(u32 core, u64 mask) {
@@ -391,6 +395,14 @@ void Thread::SetActivity(ThreadActivity value) {
}
}
void Thread::Sleep(s64 nanoseconds) {
// Sleep current thread and check for next thread to schedule
SetStatus(ThreadStatus::WaitSleep);
// Create an event to wake the thread up after the specified nanosecond delay has passed
WakeAfterDelay(nanoseconds);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/**

View File

@@ -383,6 +383,9 @@ public:
void SetActivity(ThreadActivity value);
/// Sleeps this thread for the given amount of nanoseconds.
void Sleep(s64 nanoseconds);
private:
explicit Thread(KernelCore& kernel);
~Thread() override;
@@ -398,8 +401,14 @@ private:
VAddr entry_point = 0;
VAddr stack_top = 0;
u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
u32 current_priority = 0; ///< Current thread priority, can be temporarily changed
/// Nominal thread priority, as set by the emulated application.
/// The nominal priority is the thread priority without priority
/// inheritance taken into account.
u32 nominal_priority = 0;
/// Current thread priority. This may change over the course of the
/// thread's lifetime in order to facilitate priority inheritance.
u32 current_priority = 0;
u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
u64 last_running_ticks = 0; ///< CPU tick when thread was last running
@@ -460,14 +469,4 @@ private:
*/
Thread* GetCurrentThread();
/**
* Waits the current thread on a sleep
*/
void WaitCurrentThread_Sleep();
/**
* Stops the current thread and removes it from the thread_list
*/
void ExitCurrentThread();
} // namespace Kernel

View File

@@ -7,29 +7,29 @@
#include <utility>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/memory_hook.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/file_sys/program_metadata.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
#include "core/memory_hook.h"
#include "core/memory_setup.h"
namespace Kernel {
namespace {
const char* GetMemoryStateName(MemoryState state) {
static constexpr const char* names[] = {
"Unmapped", "Io",
"Normal", "CodeStatic",
"CodeMutable", "Heap",
"Shared", "Unknown1",
"ModuleCodeStatic", "ModuleCodeMutable",
"IpcBuffer0", "Stack",
"ThreadLocal", "TransferMemoryIsolated",
"TransferMemory", "ProcessMemory",
"Inaccessible", "IpcBuffer1",
"IpcBuffer3", "KernelStack",
"Unmapped", "Io",
"Normal", "Code",
"CodeData", "Heap",
"Shared", "Unknown1",
"ModuleCode", "ModuleCodeData",
"IpcBuffer0", "Stack",
"ThreadLocal", "TransferMemoryIsolated",
"TransferMemory", "ProcessMemory",
"Inaccessible", "IpcBuffer1",
"IpcBuffer3", "KernelStack",
};
return names[ToSvcMemoryState(state)];
@@ -177,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
MemoryState state,
Memory::MemoryHookPointer mmio_handler) {
Common::MemoryHookPointer mmio_handler) {
// This is the appropriately sized VMA that will turn into our allocation.
CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
VirtualMemoryArea& final_vma = vma_handle->second;
@@ -624,7 +624,7 @@ void VMManager::ClearPageTable() {
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
page_table.special_regions.clear();
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Memory::PageType::Unmapped);
Common::PageType::Unmapped);
}
VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,

View File

@@ -9,9 +9,10 @@
#include <tuple>
#include <vector>
#include "common/common_types.h"
#include "common/memory_hook.h"
#include "common/page_table.h"
#include "core/hle/result.h"
#include "core/memory.h"
#include "core/memory_hook.h"
namespace FileSys {
enum class ProgramAddressSpaceType : u8;
@@ -164,12 +165,12 @@ enum class MemoryState : u32 {
Unmapped = 0x00,
Io = 0x01 | FlagMapped,
Normal = 0x02 | FlagMapped | FlagQueryPhysicalAddressAllowed,
CodeStatic = 0x03 | CodeFlags | FlagMapProcess,
CodeMutable = 0x04 | CodeFlags | FlagMapProcess | FlagCodeMemory,
Code = 0x03 | CodeFlags | FlagMapProcess,
CodeData = 0x04 | DataFlags | FlagMapProcess | FlagCodeMemory,
Heap = 0x05 | DataFlags | FlagCodeMemory,
Shared = 0x06 | FlagMapped | FlagMemoryPoolAllocated,
ModuleCodeStatic = 0x08 | CodeFlags | FlagModule | FlagMapProcess,
ModuleCodeMutable = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory,
ModuleCode = 0x08 | CodeFlags | FlagModule | FlagMapProcess,
ModuleCodeData = 0x09 | DataFlags | FlagModule | FlagMapProcess | FlagCodeMemory,
IpcBuffer0 = 0x0A | FlagMapped | FlagQueryPhysicalAddressAllowed | FlagMemoryPoolAllocated |
IPCFlags | FlagSharedDevice | FlagSharedDeviceAligned,
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
// Settings for type = MMIO
/// Physical address of the register area this VMA maps to.
PAddr paddr = 0;
Memory::MemoryHookPointer mmio_handler = nullptr;
Common::MemoryHookPointer mmio_handler = nullptr;
/// Tests if this area can be merged to the right with `next`.
bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
* @param mmio_handler The handler that will implement read and write for this MMIO region.
*/
ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
Memory::MemoryHookPointer mmio_handler);
Common::MemoryHookPointer mmio_handler);
/// Unmaps a range of addresses, splitting VMAs as necessary.
ResultCode UnmapRange(VAddr target, u64 size);
@@ -509,7 +510,7 @@ public:
/// Each VMManager has its own page table, which is set as the main one when the owning process
/// is scheduled.
Memory::PageTable page_table;
Common::PageTable page_table{Memory::PAGE_BITS};
private:
using VMAIter = VMAMap::iterator;
@@ -616,6 +617,9 @@ private:
VAddr new_map_region_base = 0;
VAddr new_map_region_end = 0;
VAddr main_code_region_base = 0;
VAddr main_code_region_end = 0;
VAddr tls_io_region_base = 0;
VAddr tls_io_region_end = 0;

View File

@@ -2,10 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cinttypes>
#include <cstring>
#include <stack>
#include "audio_core/audio_renderer.h"
#include "core/core.h"
#include "core/file_sys/savedata_factory.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
}
IAudioController::IAudioController() : ServiceFramework("IAudioController") {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
{1, &IAudioController::GetMainAppletExpectedMasterVolume,
"GetMainAppletExpectedMasterVolume"},
{2, &IAudioController::GetLibraryAppletExpectedMasterVolume,
"GetLibraryAppletExpectedMasterVolume"},
{3, nullptr, "ChangeMainAppletMasterVolume"},
{4, nullptr, "SetTransparentVolumeRate"},
{1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
{2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
{3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
{4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
};
// clang-format on
RegisterHandlers(functions);
}
IAudioController::~IAudioController() = default;
void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");
IPC::RequestParser rp{ctx};
const float main_applet_volume_tmp = rp.Pop<float>();
const float library_applet_volume_tmp = rp.Pop<float>();
LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
main_applet_volume_tmp, library_applet_volume_tmp);
// Ensure the volume values remain within the 0-100% range
main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
library_applet_volume =
std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");
LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(volume);
rb.Push(main_applet_volume);
}
void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");
LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push(volume);
rb.Push(library_applet_volume);
}
void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
struct Parameters {
float volume;
s64 fade_time_ns;
};
static_assert(sizeof(Parameters) == 16);
IPC::RequestParser rp{ctx};
const auto parameters = rp.PopRaw<Parameters>();
LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
parameters.fade_time_ns);
main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const float transparent_volume_rate_tmp = rp.Pop<float>();
LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
// Clamp volume range to 0-100%.
transparent_volume_rate =
std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
@@ -169,7 +215,21 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
IDisplayController::~IDisplayController() = default;
IDebugFunctions::IDebugFunctions() : ServiceFramework("IDebugFunctions") {}
IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "NotifyMessageToHomeMenuForDebug"},
{1, nullptr, "OpenMainApplication"},
{10, nullptr, "EmulateButtonEvent"},
{20, nullptr, "InvalidateTransitionLayer"},
{30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
{40, nullptr, "GetAppletResourceUsageInfo"},
};
// clang-format on
RegisterHandlers(functions);
}
IDebugFunctions::~IDebugFunctions() = default;
ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)

View File

@@ -4,6 +4,7 @@
#pragma once
#include <chrono>
#include <memory>
#include <queue>
#include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);
u32 volume{100};
static constexpr float min_allowed_volume = 0.0f;
static constexpr float max_allowed_volume = 1.0f;
float main_applet_volume{0.25f};
float library_applet_volume{max_allowed_volume};
float transparent_volume_rate{min_allowed_volume};
// Volume transition fade time in nanoseconds.
// e.g. If the main applet volume was 0% and was changed to 50%
// with a fade of 50ns, then over the course of 50ns,
// the volume will gradually fade up to 50%
std::chrono::nanoseconds fade_time_ns{0};
};
class IDisplayController final : public ServiceFramework<IDisplayController> {

View File

@@ -8,6 +8,7 @@
#include <vector>
#include <opus.h>
#include <opus_multistream.h>
#include "common/assert.h"
#include "common/logging/log.h"
@@ -18,12 +19,12 @@
namespace Service::Audio {
namespace {
struct OpusDeleter {
void operator()(void* ptr) const {
operator delete(ptr);
void operator()(OpusMSDecoder* ptr) const {
opus_multistream_decoder_destroy(ptr);
}
};
using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
using OpusDecoderPtr = std::unique_ptr<OpusMSDecoder, OpusDeleter>;
struct OpusPacketHeader {
// Packet size in bytes.
@@ -33,7 +34,7 @@ struct OpusPacketHeader {
};
static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
class OpusDecoderStateBase {
class OpusDecoderState {
public:
/// Describes extra behavior that may be asked of the decoding context.
enum class ExtraBehavior {
@@ -49,22 +50,13 @@ public:
Enabled,
};
virtual ~OpusDecoderStateBase() = default;
// Decodes interleaved Opus packets. Optionally allows reporting time taken to
// perform the decoding, as well as any relevant extra behavior.
virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
ExtraBehavior extra_behavior) = 0;
};
// Represents the decoder state for a non-multistream decoder.
class OpusDecoderState final : public OpusDecoderStateBase {
public:
explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
: decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
// Decodes interleaved Opus packets. Optionally allows reporting time taken to
// perform the decoding, as well as any relevant extra behavior.
void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
ExtraBehavior extra_behavior) override {
ExtraBehavior extra_behavior) {
if (perf_time == PerfTime::Disabled) {
DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
} else {
@@ -135,7 +127,7 @@ private:
const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
const auto out_sample_count =
opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
opus_multistream_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
if (out_sample_count < 0) {
LOG_ERROR(Audio,
"Incorrect sample count received from opus_decode, "
@@ -158,7 +150,7 @@ private:
void ResetDecoderContext() {
ASSERT(decoder != nullptr);
opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
opus_multistream_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
}
OpusDecoderPtr decoder;
@@ -168,7 +160,7 @@ private:
class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
public:
explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
explicit IHardwareOpusDecoderManager(OpusDecoderState decoder_state)
: ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
// clang-format off
static const FunctionInfo functions[] = {
@@ -190,35 +182,51 @@ private:
void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
OpusDecoderStateBase::ExtraBehavior::None);
decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Disabled,
OpusDecoderState::ExtraBehavior::None);
}
void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
OpusDecoderStateBase::ExtraBehavior::None);
decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled,
OpusDecoderState::ExtraBehavior::None);
}
void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Audio, "called");
IPC::RequestParser rp{ctx};
const auto extra_behavior = rp.Pop<bool>()
? OpusDecoderStateBase::ExtraBehavior::ResetContext
: OpusDecoderStateBase::ExtraBehavior::None;
const auto extra_behavior = rp.Pop<bool>() ? OpusDecoderState::ExtraBehavior::ResetContext
: OpusDecoderState::ExtraBehavior::None;
decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
extra_behavior);
decoder_state.DecodeInterleaved(ctx, OpusDecoderState::PerfTime::Enabled, extra_behavior);
}
std::unique_ptr<OpusDecoderStateBase> decoder_state;
OpusDecoderState decoder_state;
};
std::size_t WorkerBufferSize(u32 channel_count) {
ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
return opus_decoder_get_size(static_cast<int>(channel_count));
constexpr int num_streams = 1;
const int num_stereo_streams = channel_count == 2 ? 1 : 0;
return opus_multistream_decoder_get_size(num_streams, num_stereo_streams);
}
// Creates the mapping table that maps the input channels to the particular
// output channels. In the stereo case, we map the left and right input channels
// to the left and right output channels respectively.
//
// However, in the monophonic case, we only map the one available channel
// to the sole output channel. We specify 255 for the would-be right channel
// as this is a special value defined by Opus to indicate to the decoder to
// ignore that channel.
std::array<u8, 2> CreateMappingTable(u32 channel_count) {
if (channel_count == 2) {
return {{0, 1}};
}
return {{0, 255}};
}
} // Anonymous namespace
@@ -259,9 +267,15 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
const std::size_t worker_sz = WorkerBufferSize(channel_count);
ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
const int num_stereo_streams = channel_count == 2 ? 1 : 0;
const auto mapping_table = CreateMappingTable(channel_count);
int error = 0;
OpusDecoderPtr decoder{
opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1,
num_stereo_streams, mapping_table.data(), &error)};
if (error != OPUS_OK || decoder == nullptr) {
LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error);
IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code
rb.Push(ResultCode(-1));
@@ -271,7 +285,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<IHardwareOpusDecoderManager>(
std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
OpusDecoderState{std::move(decoder), sample_rate, channel_count});
}
HwOpus::HwOpus() : ServiceFramework("hwopus") {

View File

@@ -733,7 +733,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
FSP_SRV::~FSP_SRV() = default;
void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_FS, "(STUBBED) called");
IPC::RequestParser rp{ctx};
current_process_id = rp.Pop<u64>();
LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);

View File

@@ -32,6 +32,7 @@ private:
void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
FileSys::VirtualFile romfs;
u64 current_process_id = 0;
};
} // namespace Service::FileSystem

View File

@@ -41,20 +41,20 @@ private:
struct PadState {
union {
u32_le raw{};
BitField<0, 1, u32_le> a;
BitField<1, 1, u32_le> b;
BitField<2, 1, u32_le> x;
BitField<3, 1, u32_le> y;
BitField<4, 1, u32_le> l;
BitField<5, 1, u32_le> r;
BitField<6, 1, u32_le> zl;
BitField<7, 1, u32_le> zr;
BitField<8, 1, u32_le> plus;
BitField<9, 1, u32_le> minus;
BitField<10, 1, u32_le> d_left;
BitField<11, 1, u32_le> d_up;
BitField<12, 1, u32_le> d_right;
BitField<13, 1, u32_le> d_down;
BitField<0, 1, u32> a;
BitField<1, 1, u32> b;
BitField<2, 1, u32> x;
BitField<3, 1, u32> y;
BitField<4, 1, u32> l;
BitField<5, 1, u32> r;
BitField<6, 1, u32> zl;
BitField<7, 1, u32> zr;
BitField<8, 1, u32> plus;
BitField<9, 1, u32> minus;
BitField<10, 1, u32> d_left;
BitField<11, 1, u32> d_up;
BitField<12, 1, u32> d_right;
BitField<13, 1, u32> d_down;
};
};
static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
struct Attributes {
union {
u32_le raw{};
BitField<0, 1, u32_le> connected;
BitField<0, 1, u32> connected;
};
};
static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");

View File

@@ -39,13 +39,13 @@ public:
union {
u32_le raw{};
BitField<0, 1, u32_le> pro_controller;
BitField<1, 1, u32_le> handheld;
BitField<2, 1, u32_le> joycon_dual;
BitField<3, 1, u32_le> joycon_left;
BitField<4, 1, u32_le> joycon_right;
BitField<0, 1, u32> pro_controller;
BitField<1, 1, u32> handheld;
BitField<2, 1, u32> joycon_dual;
BitField<3, 1, u32> joycon_left;
BitField<4, 1, u32> joycon_right;
BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible
BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
};
};
static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
union {
u64_le raw{};
// Button states
BitField<0, 1, u64_le> a;
BitField<1, 1, u64_le> b;
BitField<2, 1, u64_le> x;
BitField<3, 1, u64_le> y;
BitField<4, 1, u64_le> l_stick;
BitField<5, 1, u64_le> r_stick;
BitField<6, 1, u64_le> l;
BitField<7, 1, u64_le> r;
BitField<8, 1, u64_le> zl;
BitField<9, 1, u64_le> zr;
BitField<10, 1, u64_le> plus;
BitField<11, 1, u64_le> minus;
BitField<0, 1, u64> a;
BitField<1, 1, u64> b;
BitField<2, 1, u64> x;
BitField<3, 1, u64> y;
BitField<4, 1, u64> l_stick;
BitField<5, 1, u64> r_stick;
BitField<6, 1, u64> l;
BitField<7, 1, u64> r;
BitField<8, 1, u64> zl;
BitField<9, 1, u64> zr;
BitField<10, 1, u64> plus;
BitField<11, 1, u64> minus;
// D-Pad
BitField<12, 1, u64_le> d_left;
BitField<13, 1, u64_le> d_up;
BitField<14, 1, u64_le> d_right;
BitField<15, 1, u64_le> d_down;
BitField<12, 1, u64> d_left;
BitField<13, 1, u64> d_up;
BitField<14, 1, u64> d_right;
BitField<15, 1, u64> d_down;
// Left JoyStick
BitField<16, 1, u64_le> l_stick_left;
BitField<17, 1, u64_le> l_stick_up;
BitField<18, 1, u64_le> l_stick_right;
BitField<19, 1, u64_le> l_stick_down;
BitField<16, 1, u64> l_stick_left;
BitField<17, 1, u64> l_stick_up;
BitField<18, 1, u64> l_stick_right;
BitField<19, 1, u64> l_stick_down;
// Right JoyStick
BitField<20, 1, u64_le> r_stick_left;
BitField<21, 1, u64_le> r_stick_up;
BitField<22, 1, u64_le> r_stick_right;
BitField<23, 1, u64_le> r_stick_down;
BitField<20, 1, u64> r_stick_left;
BitField<21, 1, u64> r_stick_up;
BitField<22, 1, u64> r_stick_right;
BitField<23, 1, u64> r_stick_down;
// Not always active?
BitField<24, 1, u64_le> left_sl;
BitField<25, 1, u64_le> left_sr;
BitField<24, 1, u64> left_sl;
BitField<25, 1, u64> left_sr;
BitField<26, 1, u64_le> right_sl;
BitField<27, 1, u64_le> right_sr;
BitField<26, 1, u64> right_sl;
BitField<27, 1, u64> right_sr;
};
};
static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
struct ConnectionState {
union {
u32_le raw{};
BitField<0, 1, u32_le> IsConnected;
BitField<1, 1, u32_le> IsWired;
BitField<2, 1, u32_le> IsLeftJoyConnected;
BitField<3, 1, u32_le> IsLeftJoyWired;
BitField<4, 1, u32_le> IsRightJoyConnected;
BitField<5, 1, u32_le> IsRightJoyWired;
BitField<0, 1, u32> IsConnected;
BitField<1, 1, u32> IsWired;
BitField<2, 1, u32> IsLeftJoyConnected;
BitField<3, 1, u32> IsLeftJoyWired;
BitField<4, 1, u32> IsRightJoyConnected;
BitField<5, 1, u32> IsRightJoyWired;
};
};
static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
struct NPadProperties {
union {
s64_le raw{};
BitField<11, 1, s64_le> is_vertical;
BitField<12, 1, s64_le> is_horizontal;
BitField<13, 1, s64_le> use_plus;
BitField<14, 1, s64_le> use_minus;
BitField<11, 1, s64> is_vertical;
BitField<12, 1, s64> is_horizontal;
BitField<13, 1, s64> use_plus;
BitField<14, 1, s64> use_minus;
};
};
struct NPadDevice {
union {
u32_le raw{};
BitField<0, 1, s32_le> pro_controller;
BitField<1, 1, s32_le> handheld;
BitField<2, 1, s32_le> handheld_left;
BitField<3, 1, s32_le> handheld_right;
BitField<4, 1, s32_le> joycon_left;
BitField<5, 1, s32_le> joycon_right;
BitField<6, 1, s32_le> pokeball;
BitField<0, 1, s32> pro_controller;
BitField<1, 1, s32> handheld;
BitField<2, 1, s32> handheld_left;
BitField<3, 1, s32> handheld_right;
BitField<4, 1, s32> joycon_left;
BitField<5, 1, s32> joycon_right;
BitField<6, 1, s32> pokeball;
};
};

View File

@@ -33,8 +33,8 @@ private:
struct Attributes {
union {
u32 raw{};
BitField<0, 1, u32_le> start_touch;
BitField<1, 1, u32_le> end_touch;
BitField<0, 1, u32> start_touch;
BitField<1, 1, u32> end_touch;
};
};
static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");

View File

@@ -4,6 +4,9 @@
#pragma once
#include "core/hle/service/hid/controllers/controller_base.h"
#include "core/hle/service/service.h"
#include "controllers/controller_base.h"
#include "core/hle/service/service.h"

View File

@@ -319,15 +319,14 @@ public:
}
ASSERT(vm_manager
.MirrorMemory(*map_address, nro_addr, nro_size,
Kernel::MemoryState::ModuleCodeStatic)
.MirrorMemory(*map_address, nro_addr, nro_size, Kernel::MemoryState::ModuleCode)
.IsSuccess());
ASSERT(vm_manager.UnmapRange(nro_addr, nro_size).IsSuccess());
if (bss_size > 0) {
ASSERT(vm_manager
.MirrorMemory(*map_address + nro_size, bss_addr, bss_size,
Kernel::MemoryState::ModuleCodeStatic)
Kernel::MemoryState::ModuleCode)
.IsSuccess());
ASSERT(vm_manager.UnmapRange(bss_addr, bss_size).IsSuccess());
}
@@ -388,8 +387,7 @@ public:
const auto& nro_size = iter->second.size;
ASSERT(vm_manager
.MirrorMemory(heap_addr, mapped_addr, nro_size,
Kernel::MemoryState::ModuleCodeStatic)
.MirrorMemory(heap_addr, mapped_addr, nro_size, Kernel::MemoryState::ModuleCode)
.IsSuccess());
ASSERT(vm_manager.UnmapRange(mapped_addr, nro_size).IsSuccess());

View File

@@ -42,7 +42,7 @@ private:
union {
BitField<0, 16, Flags> flags;
BitField<16, 8, Severity> severity;
BitField<24, 8, u32_le> verbosity;
BitField<24, 8, u32> verbosity;
};
u32_le payload_size;

View File

@@ -19,11 +19,11 @@ public:
virtual ~nvdevice() = default;
union Ioctl {
u32_le raw;
BitField<0, 8, u32_le> cmd;
BitField<8, 8, u32_le> group;
BitField<16, 14, u32_le> length;
BitField<30, 1, u32_le> is_in;
BitField<31, 1, u32_le> is_out;
BitField<0, 8, u32> cmd;
BitField<8, 8, u32> group;
BitField<16, 14, u32> length;
BitField<30, 1, u32> is_in;
BitField<31, 1, u32> is_out;
};
/**

View File

@@ -10,6 +10,7 @@
#include "core/core.h"
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -88,7 +89,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
for (const auto& entry : entries) {
LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
entry.offset, entry.nvmap_handle, entry.pages);
Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10;
auto object = nvmap_dev->GetObject(entry.nvmap_handle);
if (!object) {
LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
@@ -101,7 +102,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
u64 size = static_cast<u64>(entry.pages) << 0x10;
ASSERT(size <= object->size);
Tegra::GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
GPUVAddr returned = gpu.MemoryManager().MapBufferEx(object->addr, offset, size);
ASSERT(returned == offset);
}
std::memcpy(output.data(), entries.data(), output.size());
@@ -172,16 +173,8 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
return 0;
}
auto& system_instance = Core::System::GetInstance();
// Remove this memory region from the rasterizer cache.
auto& gpu = system_instance.GPU();
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
ASSERT(cpu_addr);
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset,
itr->second.size);
buffer_mappings.erase(itr->second.offset);
std::memcpy(output.data(), &params, output.size());

View File

@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/loader/elf.h"

View File

@@ -1,147 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/loader/linker.h"
#include "core/memory.h"
namespace Loader {
enum class RelocationType : u32 { ABS64 = 257, GLOB_DAT = 1025, JUMP_SLOT = 1026, RELATIVE = 1027 };
enum DynamicType : u32 {
DT_NULL = 0,
DT_PLTRELSZ = 2,
DT_STRTAB = 5,
DT_SYMTAB = 6,
DT_RELA = 7,
DT_RELASZ = 8,
DT_STRSZ = 10,
DT_JMPREL = 23,
};
struct Elf64_Rela {
u64_le offset;
RelocationType type;
u32_le symbol;
s64_le addend;
};
static_assert(sizeof(Elf64_Rela) == 0x18, "Elf64_Rela has incorrect size.");
struct Elf64_Dyn {
u64_le tag;
u64_le value;
};
static_assert(sizeof(Elf64_Dyn) == 0x10, "Elf64_Dyn has incorrect size.");
struct Elf64_Sym {
u32_le name;
INSERT_PADDING_BYTES(0x2);
u16_le shndx;
u64_le value;
u64_le size;
};
static_assert(sizeof(Elf64_Sym) == 0x18, "Elf64_Sym has incorrect size.");
void Linker::WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
u64 relocation_offset, u64 size, VAddr load_base) {
for (u64 i = 0; i < size; i += sizeof(Elf64_Rela)) {
Elf64_Rela rela;
std::memcpy(&rela, &program_image[relocation_offset + i], sizeof(Elf64_Rela));
const Symbol& symbol = symbols[rela.symbol];
switch (rela.type) {
case RelocationType::RELATIVE: {
const u64 value = load_base + rela.addend;
if (!symbol.name.empty()) {
exports[symbol.name] = value;
}
std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
break;
}
case RelocationType::JUMP_SLOT:
case RelocationType::GLOB_DAT:
if (!symbol.value) {
imports[symbol.name] = {rela.offset + load_base, 0};
} else {
exports[symbol.name] = symbol.value;
std::memcpy(&program_image[rela.offset], &symbol.value, sizeof(u64));
}
break;
case RelocationType::ABS64:
if (!symbol.value) {
imports[symbol.name] = {rela.offset + load_base, rela.addend};
} else {
const u64 value = symbol.value + rela.addend;
exports[symbol.name] = value;
std::memcpy(&program_image[rela.offset], &value, sizeof(u64));
}
break;
default:
LOG_CRITICAL(Loader, "Unknown relocation type: {}", static_cast<int>(rela.type));
break;
}
}
}
void Linker::Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base) {
std::map<u64, u64> dynamic;
while (dynamic_section_offset < program_image.size()) {
Elf64_Dyn dyn;
std::memcpy(&dyn, &program_image[dynamic_section_offset], sizeof(Elf64_Dyn));
dynamic_section_offset += sizeof(Elf64_Dyn);
if (dyn.tag == DT_NULL) {
break;
}
dynamic[dyn.tag] = dyn.value;
}
u64 offset = dynamic[DT_SYMTAB];
std::vector<Symbol> symbols;
while (offset < program_image.size()) {
Elf64_Sym sym;
std::memcpy(&sym, &program_image[offset], sizeof(Elf64_Sym));
offset += sizeof(Elf64_Sym);
if (sym.name >= dynamic[DT_STRSZ]) {
break;
}
std::string name = reinterpret_cast<char*>(&program_image[dynamic[DT_STRTAB] + sym.name]);
if (sym.value) {
exports[name] = load_base + sym.value;
symbols.emplace_back(std::move(name), load_base + sym.value);
} else {
symbols.emplace_back(std::move(name), 0);
}
}
if (dynamic.find(DT_RELA) != dynamic.end()) {
WriteRelocations(program_image, symbols, dynamic[DT_RELA], dynamic[DT_RELASZ], load_base);
}
if (dynamic.find(DT_JMPREL) != dynamic.end()) {
WriteRelocations(program_image, symbols, dynamic[DT_JMPREL], dynamic[DT_PLTRELSZ],
load_base);
}
}
void Linker::ResolveImports() {
// Resolve imports
for (const auto& import : imports) {
const auto& search = exports.find(import.first);
if (search != exports.end()) {
Memory::Write64(import.second.ea, search->second + import.second.addend);
} else {
LOG_ERROR(Loader, "Unresolved import: {}", import.first);
}
}
}
} // namespace Loader

View File

@@ -1,36 +0,0 @@
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <map>
#include <string>
#include "common/common_types.h"
namespace Loader {
class Linker {
protected:
struct Symbol {
Symbol(std::string&& name, u64 value) : name(std::move(name)), value(value) {}
std::string name;
u64 value;
};
struct Import {
VAddr ea;
s64 addend;
};
void WriteRelocations(std::vector<u8>& program_image, const std::vector<Symbol>& symbols,
u64 relocation_offset, u64 size, VAddr load_base);
void Relocate(std::vector<u8>& program_image, u32 dynamic_section_offset, VAddr load_base);
void ResolveImports();
std::map<std::string, Import> imports;
std::map<std::string, VAddr> exports;
};
} // namespace Loader

View File

@@ -14,6 +14,7 @@
#include "core/file_sys/romfs_factory.h"
#include "core/file_sys/vfs_offset.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/hle/service/filesystem/filesystem.h"

View File

@@ -4,10 +4,10 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "common/common_types.h"
#include "core/loader/linker.h"
#include "core/loader/loader.h"
namespace FileSys {
@@ -21,7 +21,7 @@ class Process;
namespace Loader {
/// Loads an NRO file
class AppLoader_NRO final : public AppLoader, Linker {
class AppLoader_NRO final : public AppLoader {
public:
explicit AppLoader_NRO(FileSys::VirtualFile file);
~AppLoader_NRO() override;

View File

@@ -7,10 +7,13 @@
#include <lz4.h>
#include "common/common_funcs.h"
#include "common/file_util.h"
#include "common/hex_util.h"
#include "common/logging/log.h"
#include "common/swap.h"
#include "core/core.h"
#include "core/file_sys/patch_manager.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/code_set.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/loader/nso.h"
@@ -164,6 +167,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
std::memcpy(program_image.data(), pi_header.data() + 0x100, program_image.size());
}
// Apply cheats if they exist and the program has a valid title ID
if (pm) {
auto& system = Core::System::GetInstance();
const auto cheats = pm->CreateCheatList(system, nso_header.build_id);
if (!cheats.empty()) {
system.RegisterCheatList(cheats, Common::HexArrayToString(nso_header.build_id),
load_base, load_base + program_image.size());
}
}
// Load codeset for current process
codeset.memory = std::make_shared<std::vector<u8>>(std::move(program_image));
process.LoadModule(std::move(codeset), load_base);

View File

@@ -6,8 +6,8 @@
#include <optional>
#include "common/common_types.h"
#include "common/swap.h"
#include "core/file_sys/patch_manager.h"
#include "core/loader/linker.h"
#include "core/loader/loader.h"
namespace Kernel {
@@ -26,7 +26,7 @@ struct NSOArgumentHeader {
static_assert(sizeof(NSOArgumentHeader) == 0x20, "NSOArgumentHeader has incorrect size.");
/// Loads an NSO file
class AppLoader_NSO final : public AppLoader, Linker {
class AppLoader_NSO final : public AppLoader {
public:
explicit AppLoader_NSO(FileSys::VirtualFile file);

View File

@@ -10,6 +10,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/page_table.h"
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
@@ -18,13 +19,14 @@
#include "core/hle/lock.h"
#include "core/memory.h"
#include "core/memory_setup.h"
#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
namespace Memory {
static PageTable* current_page_table = nullptr;
static Common::PageTable* current_page_table = nullptr;
void SetCurrentPageTable(PageTable* page_table) {
void SetCurrentPageTable(Common::PageTable* page_table) {
current_page_table = page_table;
auto& system = Core::System::GetInstance();
@@ -36,39 +38,20 @@ void SetCurrentPageTable(PageTable* page_table) {
}
}
PageTable* GetCurrentPageTable() {
Common::PageTable* GetCurrentPageTable() {
return current_page_table;
}
PageTable::PageTable() = default;
PageTable::PageTable(std::size_t address_space_width_in_bits) {
Resize(address_space_width_in_bits);
}
PageTable::~PageTable() = default;
void PageTable::Resize(std::size_t address_space_width_in_bits) {
const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
// The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
// vector size is subsequently decreased (via resize), the vector might not automatically
// actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
// 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
pointers.shrink_to_fit();
attributes.shrink_to_fit();
}
static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
Common::PageType type) {
LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
(base + size) * PAGE_SIZE);
RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
FlushMode::FlushAndInvalidate);
// During boot, current_page_table might not be set yet, in which case we need not flush
if (Core::System::GetInstance().IsPoweredOn()) {
Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
size * PAGE_SIZE);
}
VAddr end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -88,41 +71,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
}
}
void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
}
void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer mmio_handler) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
page_table.special_regions.add(
std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
}
void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
page_table.special_regions.erase(interval);
}
void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer hook) {
auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
page_table.special_regions.add(
std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
}
void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer hook) {
auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
page_table.special_regions.subtract(
std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
}
/**
@@ -171,19 +160,19 @@ T Read(const VAddr vaddr) {
return value;
}
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
case PageType::Unmapped:
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
return 0;
case PageType::Memory:
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
case Common::PageType::RasterizerCachedMemory: {
auto host_ptr{GetPointerFromVMA(vaddr)};
Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
T value;
std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
std::memcpy(&value, host_ptr, sizeof(T));
return value;
}
default:
@@ -201,18 +190,19 @@ void Write(const VAddr vaddr, const T data) {
return;
}
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
case PageType::Unmapped:
case Common::PageType::Unmapped:
LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), vaddr);
return;
case PageType::Memory:
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
break;
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
case Common::PageType::RasterizerCachedMemory: {
auto host_ptr{GetPointerFromVMA(vaddr)};
Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
std::memcpy(host_ptr, &data, sizeof(T));
break;
}
default:
@@ -227,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
if (page_pointer)
return true;
if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
return true;
if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
return false;
return false;
@@ -250,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
return page_pointer + (vaddr & PAGE_MASK);
}
if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
Common::PageType::RasterizerCachedMemory) {
return GetPointerFromVMA(vaddr);
}
@@ -284,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
if (cached) {
// Switch page type to cached if now cached
switch (page_type) {
case PageType::Unmapped:
case Common::PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address
// space, for example, a system module need not have a VRAM mapping.
break;
case PageType::Memory:
page_type = PageType::RasterizerCachedMemory;
case Common::PageType::Memory:
page_type = Common::PageType::RasterizerCachedMemory;
current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
break;
case PageType::RasterizerCachedMemory:
case Common::PageType::RasterizerCachedMemory:
// There can be more than one GPU region mapped per CPU region, so it's common that
// this area is already marked as cached.
break;
@@ -307,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
} else {
// Switch page type to uncached if now uncached
switch (page_type) {
case PageType::Unmapped:
case Common::PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address
// space, for example, a system module need not have a VRAM mapping.
break;
case PageType::Memory:
case Common::PageType::Memory:
// There can be more than one GPU region mapped per CPU region, so it's common that
// this area is already unmarked as cached.
break;
case PageType::RasterizerCachedMemory: {
case Common::PageType::RasterizerCachedMemory: {
u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
if (pointer == nullptr) {
// It's possible that this function has been called while updating the pagetable
// after unmapping a VMA. In that case the underlying VMA will no longer exist,
// and we should just leave the pagetable entry blank.
page_type = PageType::Unmapped;
page_type = Common::PageType::Unmapped;
} else {
page_type = PageType::Memory;
page_type = Common::PageType::Memory;
current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
}
break;
@@ -335,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
}
}
void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
auto& system_instance = Core::System::GetInstance();
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
// null here
if (!system_instance.IsPoweredOn()) {
return;
}
const VAddr end = start + size;
const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
if (start >= region_end || end <= region_start) {
// No overlap with region
return;
}
const VAddr overlap_start = std::max(start, region_start);
const VAddr overlap_end = std::min(end, region_end);
const VAddr overlap_size = overlap_end - overlap_start;
auto& gpu = system_instance.GPU();
switch (mode) {
case FlushMode::Flush:
gpu.FlushRegion(overlap_start, overlap_size);
break;
case FlushMode::Invalidate:
gpu.InvalidateRegion(overlap_start, overlap_size);
break;
case FlushMode::FlushAndInvalidate:
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
break;
}
};
const auto& vm_manager = Core::CurrentProcess()->VMManager();
CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
}
u8 Read8(const VAddr addr) {
return Read<u8>(addr);
}
@@ -406,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case PageType::Unmapped: {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
std::memset(dest_buffer, 0, copy_amount);
break;
}
case PageType::Memory: {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::Flush);
std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
case Common::PageType::RasterizerCachedMemory: {
const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
std::memcpy(dest_buffer, host_ptr, copy_amount);
break;
}
default:
@@ -470,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case PageType::Unmapped: {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case PageType::Memory: {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
case Common::PageType::RasterizerCachedMemory: {
const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
std::memcpy(host_ptr, src_buffer, copy_amount);
break;
}
default:
@@ -516,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case PageType::Unmapped: {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, dest_addr, size);
break;
}
case PageType::Memory: {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
u8* dest_ptr = page_table.pointers[page_index] + page_offset;
std::memset(dest_ptr, 0, copy_amount);
break;
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::Invalidate);
std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
case Common::PageType::RasterizerCachedMemory: {
const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
std::memset(host_ptr, 0, copy_amount);
break;
}
default:
@@ -558,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
switch (page_table.attributes[page_index]) {
case PageType::Unmapped: {
case Common::PageType::Unmapped: {
LOG_ERROR(HW_Memory,
"Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
current_vaddr, src_addr, size);
ZeroBlock(process, dest_addr, copy_amount);
break;
}
case PageType::Memory: {
case Common::PageType::Memory: {
DEBUG_ASSERT(page_table.pointers[page_index]);
const u8* src_ptr = page_table.pointers[page_index] + page_offset;
WriteBlock(process, dest_addr, src_ptr, copy_amount);
break;
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::Flush);
WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
case Common::PageType::RasterizerCachedMemory: {
const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
WriteBlock(process, dest_addr, host_ptr, copy_amount);
break;
}
default:

View File

@@ -6,11 +6,11 @@
#include <cstddef>
#include <string>
#include <tuple>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "core/memory_hook.h"
namespace Common {
struct PageTable;
}
namespace Kernel {
class Process;
@@ -26,71 +26,6 @@ constexpr std::size_t PAGE_BITS = 12;
constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
enum class PageType : u8 {
/// Page is unmapped and should cause an access error.
Unmapped,
/// Page is mapped to regular memory. This is the only type you can get pointers to.
Memory,
/// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
/// invalidation
RasterizerCachedMemory,
/// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
Special,
};
struct SpecialRegion {
enum class Type {
DebugHook,
IODevice,
} type;
MemoryHookPointer handler;
bool operator<(const SpecialRegion& other) const {
return std::tie(type, handler) < std::tie(other.type, other.handler);
}
bool operator==(const SpecialRegion& other) const {
return std::tie(type, handler) == std::tie(other.type, other.handler);
}
};
/**
* A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
* mimics the way a real CPU page table works.
*/
struct PageTable {
explicit PageTable();
explicit PageTable(std::size_t address_space_width_in_bits);
~PageTable();
/**
* Resizes the page table to be able to accomodate enough pages within
* a given address space.
*
* @param address_space_width_in_bits The address size width in bits.
*/
void Resize(std::size_t address_space_width_in_bits);
/**
* Vector of memory pointers backing each page. An entry can only be non-null if the
* corresponding entry in the `attributes` vector is of type `Memory`.
*/
std::vector<u8*> pointers;
/**
* Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
* of type `Special`.
*/
boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
/**
* Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
* the corresponding entry in `pointers` MUST be set to null.
*/
std::vector<PageType> attributes;
};
/// Virtual user-space memory regions
enum : VAddr {
/// Read-only page containing kernel and system configuration values.
@@ -116,8 +51,8 @@ enum : VAddr {
};
/// Currently active page table
void SetCurrentPageTable(PageTable* page_table);
PageTable* GetCurrentPageTable();
void SetCurrentPageTable(Common::PageTable* page_table);
Common::PageTable* GetCurrentPageTable();
/// Determines if the given VAddr is valid for the specified process.
bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +96,4 @@ enum class FlushMode {
*/
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
/**
* Flushes and invalidates any externally cached rasterizer resources touching the given virtual
* address region.
*/
void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
} // namespace Memory

View File

@@ -5,7 +5,11 @@
#pragma once
#include "common/common_types.h"
#include "core/memory_hook.h"
#include "common/memory_hook.h"
namespace Common {
struct PageTable;
}
namespace Memory {
@@ -17,7 +21,7 @@ namespace Memory {
* @param size The amount of bytes to map. Must be page-aligned.
* @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
*/
void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
/**
* Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
* @param size The amount of bytes to map. Must be page-aligned.
* @param mmio_handler The handler that backs the mapping.
*/
void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer mmio_handler);
void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer hook);
void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
Common::MemoryHookPointer hook);
} // namespace Memory

View File

@@ -24,17 +24,19 @@ namespace InputCommon::SDL {
class State {
public:
/// Unresisters SDL device factories and shut them down.
using Pollers = std::vector<std::unique_ptr<Polling::DevicePoller>>;
/// Unregisters SDL device factories and shut them down.
virtual ~State() = default;
virtual std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
InputCommon::Polling::DeviceType type) = 0;
virtual Pollers GetPollers(Polling::DeviceType type) = 0;
};
class NullState : public State {
public:
std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
InputCommon::Polling::DeviceType type) override {}
Pollers GetPollers(Polling::DeviceType type) override {
return {};
}
};
std::unique_ptr<State> Init();

View File

@@ -475,12 +475,11 @@ SDLState::SDLState() {
initialized = true;
if (start_thread) {
poll_thread = std::thread([&] {
poll_thread = std::thread([this] {
using namespace std::chrono_literals;
SDL_Event event;
while (initialized) {
SDL_PumpEvents();
std::this_thread::sleep_for(std::chrono::duration(10ms));
std::this_thread::sleep_for(10ms);
}
});
}
@@ -651,9 +650,9 @@ private:
};
} // namespace Polling
std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPollers(
InputCommon::Polling::DeviceType type) {
std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
SDLState::Pollers SDLState::GetPollers(InputCommon::Polling::DeviceType type) {
Pollers pollers;
switch (type) {
case InputCommon::Polling::DeviceType::Analog:
pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
@@ -661,8 +660,9 @@ std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPo
case InputCommon::Polling::DeviceType::Button:
pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
break;
return pollers;
}
return pollers;
}
} // namespace SDL

View File

@@ -25,7 +25,7 @@ public:
/// Initializes and registers SDL device factories
SDLState();
/// Unresisters SDL device factories and shut them down.
/// Unregisters SDL device factories and shut them down.
~SDLState() override;
/// Handle SDL_Events for joysticks from SDL_PollEvent
@@ -35,8 +35,7 @@ public:
std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
/// Get all DevicePoller that use the SDL backend for a specific device type
std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
InputCommon::Polling::DeviceType type) override;
Pollers GetPollers(Polling::DeviceType type) override;
/// Used by the Pollers during config
std::atomic<bool> polling = false;

View File

@@ -1,4 +1,5 @@
add_executable(tests
common/bit_field.cpp
common/param_package.cpp
common/ring_buffer.cpp
core/arm/arm_test_common.cpp

View File

@@ -0,0 +1,90 @@
// Copyright 2019 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include <type_traits>
#include <catch2/catch.hpp>
#include "common/bit_field.h"
TEST_CASE("BitField", "[common]") {
enum class TestEnum : u32 {
A = 0b10111101,
B = 0b10101110,
C = 0b00001111,
};
union LEBitField {
u32_le raw;
BitField<0, 6, u32> a;
BitField<6, 4, s32> b;
BitField<10, 8, TestEnum> c;
BitField<18, 14, u32> d;
} le_bitfield;
union BEBitField {
u32_be raw;
BitFieldBE<0, 6, u32> a;
BitFieldBE<6, 4, s32> b;
BitFieldBE<10, 8, TestEnum> c;
BitFieldBE<18, 14, u32> d;
} be_bitfield;
static_assert(sizeof(LEBitField) == sizeof(u32));
static_assert(sizeof(BEBitField) == sizeof(u32));
static_assert(std::is_trivially_copyable_v<LEBitField>);
static_assert(std::is_trivially_copyable_v<BEBitField>);
std::array<u8, 4> raw{{
0b01101100,
0b11110110,
0b10111010,
0b11101100,
}};
std::memcpy(&le_bitfield, &raw, sizeof(raw));
std::memcpy(&be_bitfield, &raw, sizeof(raw));
// bit fields: 11101100101110'10111101'1001'101100
REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
REQUIRE(le_bitfield.a == 0b101100);
REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
REQUIRE(le_bitfield.c == TestEnum::A);
REQUIRE(le_bitfield.d == 0b11101100101110);
le_bitfield.a.Assign(0b000111);
le_bitfield.b.Assign(-1);
le_bitfield.c.Assign(TestEnum::C);
le_bitfield.d.Assign(0b01010101010101);
std::memcpy(&raw, &le_bitfield, sizeof(raw));
// bit fields: 01010101010101'00001111'1111'000111
REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
REQUIRE(raw == std::array<u8, 4>{{
0b11000111,
0b00111111,
0b01010100,
0b01010101,
}});
// bit fields: 01101100111101'10101110'1011'101100
REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
REQUIRE(be_bitfield.a == 0b101100);
REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
REQUIRE(be_bitfield.c == TestEnum::B);
REQUIRE(be_bitfield.d == 0b01101100111101);
be_bitfield.a.Assign(0b000111);
be_bitfield.b.Assign(-1);
be_bitfield.c.Assign(TestEnum::C);
be_bitfield.d.Assign(0b01010101010101);
std::memcpy(&raw, &be_bitfield, sizeof(raw));
// bit fields: 01010101010101'00001111'1111'000111
REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
REQUIRE(raw == std::array<u8, 4>{{
0b01010101,
0b01010100,
0b00111111,
0b11000111,
}});
}

View File

@@ -4,6 +4,7 @@
#include <algorithm>
#include "common/page_table.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/memory.h"
@@ -22,7 +23,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
page_table->special_regions.clear();
std::fill(page_table->attributes.begin(), page_table->attributes.end(),
Memory::PageType::Unmapped);
Common::PageType::Unmapped);
Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);

View File

@@ -9,10 +9,10 @@
#include <vector>
#include "common/common_types.h"
#include "common/memory_hook.h"
#include "core/hle/kernel/kernel.h"
#include "core/memory_hook.h"
namespace Memory {
namespace Common {
struct PageTable;
}
@@ -58,7 +58,7 @@ public:
private:
friend struct TestMemory;
struct TestMemory final : Memory::MemoryHook {
struct TestMemory final : Common::MemoryHook {
explicit TestMemory(TestEnvironment* env_) : env(env_) {}
TestEnvironment* env;
@@ -86,7 +86,7 @@ private:
bool mutable_memory;
std::shared_ptr<TestMemory> test_memory;
std::vector<WriteRecord> write_records;
Memory::PageTable* page_table = nullptr;
Common::PageTable* page_table = nullptr;
Kernel::KernelCore kernel;
};

View File

@@ -123,6 +123,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_sampler_cache.cpp
renderer_vulkan/vk_sampler_cache.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp

View File

@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
}
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size);
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {

View File

@@ -9,7 +9,6 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace Tegra {

View File

@@ -9,6 +9,7 @@
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra::Engines {
@@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
const GPUVAddr address = regs.dest.Address();
const auto dest_address =
memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
ASSERT_MSG(dest_address, "Invalid GPU address");
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
// We do this before actually writing the new data because the destination address might
// contain a dirty surface that will have to be written back to memory.
const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
memory_manager.Write<u32>(address, data);
Memory::Write32(*dest_address, data);
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;

View File

@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
}
void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress();
const GPUVAddr sequence_address{regs.query.QueryAddress()};
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing.
const auto address = memory_manager.GpuToCpuAddress(sequence_address);
ASSERT_MSG(address, "Invalid GPU address");
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
// Write the current query sequence to the sequence address.
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short
// query.
Memory::Write32(*address, sequence);
memory_manager.Write<u32>(sequence_address, sequence);
} else {
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
// GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = system.CoreTiming().GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();
break;
@@ -393,10 +391,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
ASSERT_MSG(address, "Invalid GPU address");
const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
u8* ptr{memory_manager.GetPointer(address)};
rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
memory_manager.Write<u32>(address, value);
Memory::Write32(*address, value);
dirty_flags.OnMemoryWrite();
// Increment the current buffer position.
@@ -404,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_base_address = regs.tic.TICAddress();
const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
@@ -455,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
const auto address = memory_manager.GpuToCpuAddress(current_texture);
ASSERT_MSG(address, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);

View File

@@ -9,6 +9,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -42,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
const auto source_cpu = memory_manager.GpuToCpuAddress(source);
const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
ASSERT_MSG(source_cpu, "Invalid source GPU address");
ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -69,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
memory_manager.CopyBlock(dest, source, regs.x_count);
return;
}
@@ -78,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
const VAddr source_line = *source_cpu + line * regs.src_pitch;
const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
Memory::CopyBlock(dest_line, source_line, regs.x_count);
const GPUVAddr source_line = source + line * regs.src_pitch;
const GPUVAddr dest_line = dest + line * regs.dst_pitch;
memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
}
return;
}
@@ -89,15 +85,28 @@ void MaxwellDMA::HandleCopy() {
const std::size_t copy_size = regs.x_count * regs.y_count;
auto source_ptr{memory_manager.GetPointer(source)};
auto dst_ptr{memory_manager.GetPointer(dest)};
if (!source_ptr) {
LOG_ERROR(HW_GPU, "source_ptr is invalid");
return;
}
if (!dst_ptr) {
LOG_ERROR(HW_GPU, "dst_ptr is invalid");
return;
}
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -110,8 +119,8 @@ void MaxwellDMA::HandleCopy() {
copy_size * src_bytes_per_pixel);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
*dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
} else {
ASSERT(regs.dst_params.size_z == 1);
@@ -124,7 +133,7 @@ void MaxwellDMA::HandleCopy() {
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
}
}

View File

@@ -12,6 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_base.h"
namespace Tegra {
@@ -274,7 +275,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
const auto op =
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
if (op == GpuSemaphoreOperation::WriteLong) {
auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
struct Block {
u32 sequence;
u32 zeros = 0;
@@ -286,11 +286,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
Memory::WriteBlock(*address, &block, sizeof(block));
memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
} else {
const auto address =
memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
const u32 word{memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress())};
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
(op == GpuSemaphoreOperation::AcquireGequal &&
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +315,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
}
void GPU::ProcessSemaphoreRelease() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
Memory::Write32(*address, regs.semaphore_release);
memory_manager->Write<u32>(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
}
void GPU::ProcessSemaphoreAcquire() {
const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
const u32 word = Memory::Read32(*address);
const u32 word = memory_manager->Read<u32>(regs.smaphore_address.SmaphoreAddress());
const auto value = regs.semaphore_acquire;
if (word != value) {
regs.acquire_active = true;

View File

@@ -9,7 +9,11 @@
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/dma_pusher.h"
#include "video_core/memory_manager.h"
using CacheAddr = std::uintptr_t;
inline CacheAddr ToCacheAddr(const void* host_ptr) {
return reinterpret_cast<CacheAddr>(host_ptr);
}
namespace Core {
class System;
@@ -119,6 +123,8 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5,
};
class MemoryManager;
class GPU {
public:
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
@@ -209,13 +215,13 @@ public:
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
private:
void ProcessBindMethod(const MethodCall& method_call);
@@ -239,9 +245,8 @@ protected:
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids.
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
/// 2D engine

View File

@@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
gpu_thread.SwapBuffers(std::move(framebuffer));
}
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}

View File

@@ -26,9 +26,9 @@ public:
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
private:
GPUThread::ThreadManager gpu_thread;

View File

@@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
renderer.SwapBuffers(std::move(framebuffer));
}
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
}

View File

@@ -21,9 +21,9 @@ public:
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
};
} // namespace VideoCommon

View File

@@ -5,7 +5,6 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/settings.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@
namespace VideoCommon::GPUThread {
/// Executes a single GPU thread command
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher) {
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
renderer.SwapBuffers(data->framebuffer);
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
} else {
UNREACHABLE();
}
}
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
auto WaitForWakeup = [&]() {
std::unique_lock<std::mutex> lock{state.signal_mutex};
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
};
// Wait for first GPU command before acquiring the window context
WaitForWakeup();
state.WaitForCommands();
// If emulation was stopped during disk shader loading, abort before trying to acquire context
if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
CommandDataContainer next;
while (state.is_running) {
if (!state.is_running) {
return;
state.WaitForCommands();
while (!state.queue.Empty()) {
state.queue.Pop(next);
if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
state.DecrementFramesCounter();
renderer.SwapBuffers(std::move(data->framebuffer));
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
return;
} else {
UNREACHABLE();
}
}
{
// Thread has been woken up, so make the previous write queue the next read queue
std::lock_guard<std::mutex> lock{state.signal_mutex};
std::swap(state.push_queue, state.pop_queue);
}
// Execute all of the GPU commands
while (!state.pop_queue->empty()) {
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
state.pop_queue->pop();
}
state.UpdateIdleState();
// Signal that the GPU thread has finished processing commands
if (state.is_idle) {
state.idle_condition.notify_one();
}
// Wait for CPU thread to send more GPU commands
WaitForWakeup();
}
}
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
std::ref(dma_pusher), std::ref(state)},
thread_id{thread.get_id()} {}
std::ref(dma_pusher), std::ref(state)} {}
ThreadManager::~ThreadManager() {
{
// Notify GPU thread that a shutdown is pending
std::lock_guard<std::mutex> lock{state.signal_mutex};
state.is_running = false;
}
state.signal_condition.notify_one();
// Notify GPU thread that a shutdown is pending
PushCommand(EndProcessingCommand());
thread.join();
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
if (entries.empty()) {
return;
}
PushCommand(SubmitListCommand(std::move(entries)), false, false);
PushCommand(SubmitListCommand(std::move(entries)));
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
state.IncrementFramesCounter();
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
state.WaitForFrames();
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
// Block the CPU when using accurate emulation
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
PushCommand(FlushRegionCommand(addr, size));
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
PushCommand(InvalidateRegionCommand(addr, size), true, true);
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
if (state.queue.Empty()) {
// It's quicker to invalidate a single region on the CPU if the queue is already empty
renderer.Rasterizer().InvalidateRegion(addr, size);
} else {
PushCommand(InvalidateRegionCommand(addr, size));
}
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
InvalidateRegion(addr, size);
}
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
{
std::lock_guard<std::mutex> lock{state.signal_mutex};
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
// Execute the command synchronously on the current thread
ExecuteCommand(&command_data, renderer, dma_pusher);
return;
}
// Push the command to the GPU thread
state.UpdateIdleState();
state.push_queue->emplace(command_data);
}
// Signal the GPU thread that commands are pending
state.signal_condition.notify_one();
if (wait_for_idle) {
// Wait for the GPU to be idle (all commands to be executed)
std::unique_lock<std::mutex> lock{state.idle_mutex};
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
}
void ThreadManager::PushCommand(CommandData&& command_data) {
state.queue.Push(CommandDataContainer(std::move(command_data)));
state.SignalCommands();
}
} // namespace VideoCommon::GPUThread

View File

@@ -13,6 +13,9 @@
#include <thread>
#include <variant>
#include "common/threadsafe_queue.h"
#include "video_core/gpu.h"
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;
namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that processing has ended
struct EndProcessingCommand final {};
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
: framebuffer{std::move(framebuffer)} {}
std::optional<const Tegra::FramebufferConfig> framebuffer;
std::optional<Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
CacheAddr addr;
u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
CacheAddr addr;
u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
: addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
CacheAddr addr;
u64 size;
};
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
return *this;
}
CommandData data;
};
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic<bool> is_running{true};
std::atomic<bool> is_idle{true};
std::condition_variable signal_condition;
std::mutex signal_mutex;
std::condition_variable idle_condition;
std::mutex idle_mutex;
std::atomic_bool is_running{true};
std::atomic_int queued_frame_count{};
std::mutex frames_mutex;
std::mutex commands_mutex;
std::condition_variable commands_condition;
std::condition_variable frames_condition;
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
// empty. This allows for efficient thread-safe access, as it does not require any copies.
using CommandQueue = std::queue<CommandData>;
std::array<CommandQueue, 2> command_queues;
CommandQueue* push_queue{&command_queues[0]};
CommandQueue* pop_queue{&command_queues[1]};
void UpdateIdleState() {
std::lock_guard<std::mutex> lock{idle_mutex};
is_idle = command_queues[0].empty() && command_queues[1].empty();
void IncrementFramesCounter() {
std::lock_guard<std::mutex> lock{frames_mutex};
++queued_frame_count;
}
void DecrementFramesCounter() {
{
std::lock_guard<std::mutex> lock{frames_mutex};
--queued_frame_count;
if (queued_frame_count) {
return;
}
}
frames_condition.notify_one();
}
void WaitForFrames() {
{
std::lock_guard<std::mutex> lock{frames_mutex};
if (!queued_frame_count) {
return;
}
}
// Wait for the GPU to be idle (all commands to be executed)
{
std::unique_lock<std::mutex> lock{frames_mutex};
frames_condition.wait(lock, [this] { return !queued_frame_count; });
}
}
void SignalCommands() {
{
std::unique_lock<std::mutex> lock{commands_mutex};
if (queue.Empty()) {
return;
}
}
commands_condition.notify_one();
}
void WaitForCommands() {
std::unique_lock<std::mutex> lock{commands_mutex};
commands_condition.wait(lock, [this] { return !queue.Empty(); });
}
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
};
/// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(VAddr addr, u64 size);
void FlushRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
void InvalidateRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
private:
/// Pushes a command to be executed by the GPU thread
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
/// Returns true if this is called by the GPU thread
bool IsGpuThread() const {
return std::this_thread::get_id() == thread_id;
}
void PushCommand(CommandData&& command_data);
private:
SynchState state;

View File

@@ -5,181 +5,446 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra {
MemoryManager::MemoryManager() {
// Mark the first page as reserved, so that 0 is not a valid GPUVAddr. Otherwise, games might
// try to use 0 as a valid address, which is also used to mean nullptr. This fixes a bug with
// Undertale using 0 for a render target.
PageSlot(0) = static_cast<u64>(PageStatus::Reserved);
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Common::PageType::Unmapped);
page_table.Resize(address_space_width);
// Initialize the map with a single free region covering the entire managed space.
VirtualMemoryArea initial_vma;
initial_vma.size = address_space_end;
vma_map.emplace(initial_vma.base, initial_vma);
UpdatePageTableForVMA(initial_vma);
}
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
const u64 aligned_size{Common::AlignUp(size, page_size)};
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
AllocateMemory(gpu_addr, 0, aligned_size);
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = static_cast<u64>(PageStatus::Allocated);
}
return *gpu_addr;
return gpu_addr;
}
GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
const u64 aligned_size{Common::AlignUp(size, page_size)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = static_cast<u64>(PageStatus::Allocated);
}
AllocateMemory(gpu_addr, 0, aligned_size);
return gpu_addr;
}
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
const u64 aligned_size{Common::AlignUp(size, page_size)};
const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
slot = cpu_addr + offset;
}
const MappedRegion region{cpu_addr, *gpu_addr, size};
mapped_regions.push_back(region);
return *gpu_addr;
return gpu_addr;
}
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
ASSERT((gpu_addr & page_mask) == 0);
if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
// Page has been already mapped. In this case, we must find a new area of memory to use that
// is different than the specified one. Super Mario Odyssey hits this scenario when changing
// areas, but we do not want to overwrite the old pages.
// TODO(bunnei): We need to write a hardware test to confirm this behavior.
const u64 aligned_size{Common::AlignUp(size, page_size)};
LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
const std::optional<GPUVAddr> new_gpu_addr{
FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
gpu_addr = *new_gpu_addr;
}
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
slot = cpu_addr + offset;
}
const MappedRegion region{cpu_addr, gpu_addr, size};
mapped_regions.push_back(region);
MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
return gpu_addr;
}
GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
ASSERT((gpu_addr & page_mask) == 0);
for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
VAddr& slot{PageSlot(gpu_addr + offset)};
const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
slot != static_cast<u64>(PageStatus::Unmapped));
Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
aligned_size);
UnmapRange(gpu_addr, aligned_size);
slot = static_cast<u64>(PageStatus::Unmapped);
}
// Delete the region mappings that are contained within the unmapped region
mapped_regions.erase(std::remove_if(mapped_regions.begin(), mapped_regions.end(),
[&](const MappedRegion& region) {
return region.gpu_addr <= gpu_addr &&
region.gpu_addr + region.size < gpu_addr + size;
}),
mapped_regions.end());
return gpu_addr;
}
GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
for (const auto& region : mapped_regions) {
const GPUVAddr region_end{region.gpu_addr + region.size};
if (region_start >= region.gpu_addr && region_start < region_end) {
return region_end;
GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
// Find the first Free VMA.
const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
return false;
}
}
return {};
}
std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
PageStatus status) {
GPUVAddr gpu_addr{region_start};
u64 free_space{};
align = (align + PAGE_MASK) & ~PAGE_MASK;
const VAddr vma_end{vma.second.base + vma.second.size};
return vma_end > region_start && vma_end >= region_start + size;
})};
while (gpu_addr + free_space < MAX_ADDRESS) {
if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
free_space += PAGE_SIZE;
if (free_space >= size) {
return gpu_addr;
}
} else {
gpu_addr += free_space + PAGE_SIZE;
free_space = 0;
gpu_addr = Common::AlignUp(gpu_addr, align);
}
}
return {};
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
const VAddr base_addr{PageSlot(gpu_addr)};
if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
base_addr == static_cast<u64>(PageStatus::Unmapped) ||
base_addr == static_cast<u64>(PageStatus::Reserved)) {
if (vma_handle == vma_map.end()) {
return {};
}
return base_addr + (gpu_addr & PAGE_MASK);
return std::max(region_start, vma_handle->second.base);
}
std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
std::vector<GPUVAddr> results;
for (const auto& region : mapped_regions) {
if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
const u64 offset{cpu_addr - region.cpu_addr};
results.push_back(region.gpu_addr + offset);
bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
return (addr >> page_bits) < page_table.pointers.size();
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
if (!IsAddressValid(addr)) {
return {};
}
VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
if (cpu_addr) {
return cpu_addr + (addr & page_mask);
}
return {};
}
template <typename T>
T MemoryManager::Read(GPUVAddr addr) {
if (!IsAddressValid(addr)) {
return {};
}
const u8* page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
T value;
std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T));
return value;
}
switch (page_table.attributes[addr >> page_bits]) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr);
return 0;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
break;
default:
UNREACHABLE();
}
return {};
}
template <typename T>
void MemoryManager::Write(GPUVAddr addr, T data) {
if (!IsAddressValid(addr)) {
return;
}
u8* page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer) {
// NOTE: Avoid adding any extra logic to this fast-path block
std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T));
return;
}
switch (page_table.attributes[addr >> page_bits]) {
case Common::PageType::Unmapped:
LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
static_cast<u32>(data), addr);
return;
case Common::PageType::Memory:
ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr);
break;
default:
UNREACHABLE();
}
}
template u8 MemoryManager::Read<u8>(GPUVAddr addr);
template u16 MemoryManager::Read<u16>(GPUVAddr addr);
template u32 MemoryManager::Read<u32>(GPUVAddr addr);
template u64 MemoryManager::Read<u64>(GPUVAddr addr);
template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data);
u8* MemoryManager::GetPointer(GPUVAddr addr) {
if (!IsAddressValid(addr)) {
return {};
}
u8* page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer) {
return page_pointer + (addr & page_mask);
}
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
return {};
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
std::memcpy(dest_buffer, GetPointer(src_addr), size);
}
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
std::memcpy(GetPointer(dest_addr), src_buffer, size);
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
VAddr backing_addr) {
LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
(base + size) * page_size);
const VAddr end{base + size};
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size());
std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
if (memory == nullptr) {
std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end,
backing_addr);
} else {
while (base != end) {
page_table.pointers[base] = memory;
page_table.backing_addr[base] = backing_addr;
base += 1;
memory += page_size;
backing_addr += page_size;
}
}
return results;
}
VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
if (!block) {
block = std::make_unique<PageBlock>();
block->fill(static_cast<VAddr>(PageStatus::Unmapped));
void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) {
ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr);
}
void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) {
ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base);
MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped);
}
bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
ASSERT(base + size == next.base);
if (type != next.type) {
return {};
}
if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) {
return {};
}
if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) {
return {};
}
return true;
}
MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const {
if (target >= address_space_end) {
return vma_map.end();
} else {
return std::prev(vma_map.upper_bound(target));
}
}
MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) {
VirtualMemoryArea& vma{vma_handle->second};
vma.type = VirtualMemoryArea::Type::Allocated;
vma.backing_addr = 0;
vma.backing_memory = {};
UpdatePageTableForVMA(vma);
return MergeAdjacent(vma_handle);
}
MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset,
u64 size) {
// This is the appropriately sized VMA that will turn into our allocation.
VMAIter vma_handle{CarveVMA(target, size)};
VirtualMemoryArea& vma{vma_handle->second};
ASSERT(vma.size == size);
vma.offset = offset;
return Allocate(vma_handle);
}
MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size,
VAddr backing_addr) {
// This is the appropriately sized VMA that will turn into our allocation.
VMAIter vma_handle{CarveVMA(target, size)};
VirtualMemoryArea& vma{vma_handle->second};
ASSERT(vma.size == size);
vma.type = VirtualMemoryArea::Type::Mapped;
vma.backing_memory = memory;
vma.backing_addr = backing_addr;
UpdatePageTableForVMA(vma);
return MergeAdjacent(vma_handle);
}
void MemoryManager::UnmapRange(GPUVAddr target, u64 size) {
VMAIter vma{CarveVMARange(target, size)};
const VAddr target_end{target + size};
const VMAIter end{vma_map.end()};
// The comparison against the end of the range must be done using addresses since VMAs can be
// merged during this process, causing invalidation of the iterators.
while (vma != end && vma->second.base < target_end) {
// Unmapped ranges return to allocated state and can be reused
// This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games
vma = std::next(Allocate(vma));
}
ASSERT(FindVMA(target)->second.size >= size);
}
MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) {
// This uses a neat C++ trick to convert a const_iterator to a regular iterator, given
// non-const access to its container.
return vma_map.erase(iter, iter); // Erases an empty range of elements
}
MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base);
VMAIter vma_handle{StripIterConstness(FindVMA(base))};
if (vma_handle == vma_map.end()) {
// Target address is outside the managed range
return {};
}
const VirtualMemoryArea& vma{vma_handle->second};
if (vma.type == VirtualMemoryArea::Type::Mapped) {
// Region is already allocated
return {};
}
const VAddr start_in_vma{base - vma.base};
const VAddr end_in_vma{start_in_vma + size};
ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}",
vma.size, end_in_vma);
if (end_in_vma < vma.size) {
// Split VMA at the end of the allocated region
SplitVMA(vma_handle, end_in_vma);
}
if (start_in_vma != 0) {
// Split VMA at the start of the allocated region
vma_handle = SplitVMA(vma_handle, start_in_vma);
}
return vma_handle;
}
MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) {
ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size);
ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target);
const VAddr target_end{target + size};
ASSERT(target_end >= target);
ASSERT(size > 0);
VMAIter begin_vma{StripIterConstness(FindVMA(target))};
const VMAIter i_end{vma_map.lower_bound(target_end)};
if (std::any_of(begin_vma, i_end, [](const auto& entry) {
return entry.second.type == VirtualMemoryArea::Type::Unmapped;
})) {
return {};
}
if (target != begin_vma->second.base) {
begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
}
VMAIter end_vma{StripIterConstness(FindVMA(target_end))};
if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
}
return begin_vma;
}
MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
VirtualMemoryArea& old_vma{vma_handle->second};
VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA
// For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably
// a bug. This restriction might be removed later.
ASSERT(offset_in_vma < old_vma.size);
ASSERT(offset_in_vma > 0);
old_vma.size = offset_in_vma;
new_vma.base += offset_in_vma;
new_vma.size -= offset_in_vma;
switch (new_vma.type) {
case VirtualMemoryArea::Type::Unmapped:
break;
case VirtualMemoryArea::Type::Allocated:
new_vma.offset += offset_in_vma;
break;
case VirtualMemoryArea::Type::Mapped:
new_vma.backing_memory += offset_in_vma;
break;
}
ASSERT(old_vma.CanBeMergedWith(new_vma));
return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
}
MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) {
const VMAIter next_vma{std::next(iter)};
if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
iter->second.size += next_vma->second.size;
vma_map.erase(next_vma);
}
if (iter != vma_map.begin()) {
VMAIter prev_vma{std::prev(iter)};
if (prev_vma->second.CanBeMergedWith(iter->second)) {
prev_vma->second.size += iter->second.size;
vma_map.erase(iter);
iter = prev_vma;
}
}
return iter;
}
void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
switch (vma.type) {
case VirtualMemoryArea::Type::Unmapped:
UnmapRegion(vma.base, vma.size);
break;
case VirtualMemoryArea::Type::Allocated:
MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr);
break;
case VirtualMemoryArea::Type::Mapped:
MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr);
break;
}
return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
}
} // namespace Tegra

View File

@@ -1,67 +1,148 @@
// Copyright 2018 yuzu emulator team
// Copyright 2018 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <memory>
#include <map>
#include <optional>
#include <vector>
#include "common/common_types.h"
#include "common/page_table.h"
namespace Tegra {
/// Virtual addresses in the GPU's memory map are 64 bit.
using GPUVAddr = u64;
/**
* Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space
* with homogeneous attributes across its extents. In this particular implementation each VMA is
* also backed by a single host memory allocation.
*/
struct VirtualMemoryArea {
enum class Type : u8 {
Unmapped,
Allocated,
Mapped,
};
/// Virtual base address of the region.
GPUVAddr base{};
/// Size of the region.
u64 size{};
/// Memory area mapping type.
Type type{Type::Unmapped};
/// CPU memory mapped address corresponding to this memory area.
VAddr backing_addr{};
/// Offset into the backing_memory the mapping starts from.
std::size_t offset{};
/// Pointer backing this VMA.
u8* backing_memory{};
/// Tests if this area can be merged to the right with `next`.
bool CanBeMergedWith(const VirtualMemoryArea& next) const;
};
class MemoryManager final {
public:
MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
static constexpr u64 PAGE_BITS = 16;
static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
template <typename T>
T Read(GPUVAddr addr);
template <typename T>
void Write(GPUVAddr addr, T data);
u8* GetPointer(GPUVAddr addr);
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
private:
enum class PageStatus : u64 {
Unmapped = 0xFFFFFFFFFFFFFFFFULL,
Allocated = 0xFFFFFFFFFFFFFFFEULL,
Reserved = 0xFFFFFFFFFFFFFFFDULL,
};
using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
using VMAHandle = VMAMap::const_iterator;
using VMAIter = VMAMap::iterator;
std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
PageStatus status);
VAddr& PageSlot(GPUVAddr gpu_addr);
bool IsAddressValid(GPUVAddr addr) const;
void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
VAddr backing_addr = 0);
void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr);
void UnmapRegion(GPUVAddr base, u64 size);
static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
static constexpr u64 PAGE_TABLE_BITS{10};
static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
static constexpr u64 PAGE_TABLE_MASK{PAGE_TABLE_SIZE - 1};
static constexpr u64 PAGE_BLOCK_BITS{14};
static constexpr u64 PAGE_BLOCK_SIZE{1 << PAGE_BLOCK_BITS};
static constexpr u64 PAGE_BLOCK_MASK{PAGE_BLOCK_SIZE - 1};
/// Finds the VMA in which the given address is included in, or `vma_map.end()`.
VMAHandle FindVMA(GPUVAddr target) const;
using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size);
struct MappedRegion {
VAddr cpu_addr;
GPUVAddr gpu_addr;
u64 size;
};
/**
* Maps an unmanaged host memory pointer at a given address.
*
* @param target The guest address to start the mapping at.
* @param memory The memory to be mapped.
* @param size Size of the mapping.
* @param state MemoryState tag to attach to the VMA.
*/
VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);
std::vector<MappedRegion> mapped_regions;
/// Unmaps a range of addresses, splitting VMAs as necessary.
void UnmapRange(GPUVAddr target, u64 size);
/// Converts a VMAHandle to a mutable VMAIter.
VMAIter StripIterConstness(const VMAHandle& iter);
/// Marks as the specfied VMA as allocated.
VMAIter Allocate(VMAIter vma);
/**
* Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
* the appropriate error checking.
*/
VMAIter CarveVMA(GPUVAddr base, u64 size);
/**
* Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
* end of the range.
*/
VMAIter CarveVMARange(GPUVAddr base, u64 size);
/**
* Splits a VMA in two, at the specified offset.
* @returns the right side of the split, with the original iterator becoming the left side.
*/
VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma);
/**
* Checks for and merges the specified VMA with adjacent ones if possible.
* @returns the merged VMA or the original if no merging was possible.
*/
VMAIter MergeAdjacent(VMAIter vma);
/// Updates the pages corresponding to this VMA so they match the VMA's attributes.
void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
/// Finds a free (unmapped region) of the specified size starting at the specified address.
GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
private:
static constexpr u64 page_bits{16};
static constexpr u64 page_size{1 << page_bits};
static constexpr u64 page_mask{page_size - 1};
/// Address space in bits, this is fairly arbitrary but sufficiently large.
static constexpr u32 address_space_width{39};
/// Start address for mapping, this is fairly arbitrary but must be non-zero.
static constexpr GPUVAddr address_space_base{0x100000};
/// End of address space, based on address space in bits.
static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
Common::PageTable page_table{page_bits};
VMAMap vma_map;
};
} // namespace Tegra

View File

@@ -6,7 +6,6 @@
#include <cstring>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/morton.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
using Surface::GetBytesPerPixel;
using Surface::PixelFormat;
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
template <bool morton_to_linear, PixelFormat format>
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
u32 tile_width_spacing, u8* buffer, u8* addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,150 +33,146 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
stride, height, depth, block_height, block_depth,
tile_width_spacing);
} else {
Tegra::Texture::CopySwizzledData(
(stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
block_height, block_depth, tile_width_spacing);
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth,
bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
block_height, block_depth, tile_width_spacing);
}
}
static constexpr ConversionArray morton_to_linear_fns = {
// clang-format off
MortonCopy<true, PixelFormat::ABGR8U>,
MortonCopy<true, PixelFormat::ABGR8S>,
MortonCopy<true, PixelFormat::ABGR8UI>,
MortonCopy<true, PixelFormat::B5G6R5U>,
MortonCopy<true, PixelFormat::A2B10G10R10U>,
MortonCopy<true, PixelFormat::A1B5G5R5U>,
MortonCopy<true, PixelFormat::R8U>,
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
MortonCopy<true, PixelFormat::DXT1>,
MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>,
MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::DXN2UNORM>,
MortonCopy<true, PixelFormat::DXN2SNORM>,
MortonCopy<true, PixelFormat::BC7U>,
MortonCopy<true, PixelFormat::BC6H_UF16>,
MortonCopy<true, PixelFormat::BC6H_SF16>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
MortonCopy<true, PixelFormat::BGRA8>,
MortonCopy<true, PixelFormat::RGBA32F>,
MortonCopy<true, PixelFormat::RG32F>,
MortonCopy<true, PixelFormat::R32F>,
MortonCopy<true, PixelFormat::R16F>,
MortonCopy<true, PixelFormat::R16U>,
MortonCopy<true, PixelFormat::R16S>,
MortonCopy<true, PixelFormat::R16UI>,
MortonCopy<true, PixelFormat::R16I>,
MortonCopy<true, PixelFormat::RG16>,
MortonCopy<true, PixelFormat::RG16F>,
MortonCopy<true, PixelFormat::RG16UI>,
MortonCopy<true, PixelFormat::RG16I>,
MortonCopy<true, PixelFormat::RG16S>,
MortonCopy<true, PixelFormat::RGB32F>,
MortonCopy<true, PixelFormat::RGBA8_SRGB>,
MortonCopy<true, PixelFormat::RG8U>,
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::R32UI>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
MortonCopy<true, PixelFormat::BGRA8_SRGB>,
MortonCopy<true, PixelFormat::DXT1_SRGB>,
MortonCopy<true, PixelFormat::DXT23_SRGB>,
MortonCopy<true, PixelFormat::DXT45_SRGB>,
MortonCopy<true, PixelFormat::BC7U_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
MortonCopy<true, PixelFormat::S8Z24>,
MortonCopy<true, PixelFormat::Z32FS8>,
// clang-format on
MortonCopy<true, PixelFormat::ABGR8U>,
MortonCopy<true, PixelFormat::ABGR8S>,
MortonCopy<true, PixelFormat::ABGR8UI>,
MortonCopy<true, PixelFormat::B5G6R5U>,
MortonCopy<true, PixelFormat::A2B10G10R10U>,
MortonCopy<true, PixelFormat::A1B5G5R5U>,
MortonCopy<true, PixelFormat::R8U>,
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
MortonCopy<true, PixelFormat::DXT1>,
MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>,
MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::DXN2UNORM>,
MortonCopy<true, PixelFormat::DXN2SNORM>,
MortonCopy<true, PixelFormat::BC7U>,
MortonCopy<true, PixelFormat::BC6H_UF16>,
MortonCopy<true, PixelFormat::BC6H_SF16>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
MortonCopy<true, PixelFormat::BGRA8>,
MortonCopy<true, PixelFormat::RGBA32F>,
MortonCopy<true, PixelFormat::RG32F>,
MortonCopy<true, PixelFormat::R32F>,
MortonCopy<true, PixelFormat::R16F>,
MortonCopy<true, PixelFormat::R16U>,
MortonCopy<true, PixelFormat::R16S>,
MortonCopy<true, PixelFormat::R16UI>,
MortonCopy<true, PixelFormat::R16I>,
MortonCopy<true, PixelFormat::RG16>,
MortonCopy<true, PixelFormat::RG16F>,
MortonCopy<true, PixelFormat::RG16UI>,
MortonCopy<true, PixelFormat::RG16I>,
MortonCopy<true, PixelFormat::RG16S>,
MortonCopy<true, PixelFormat::RGB32F>,
MortonCopy<true, PixelFormat::RGBA8_SRGB>,
MortonCopy<true, PixelFormat::RG8U>,
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::R32UI>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
MortonCopy<true, PixelFormat::BGRA8_SRGB>,
MortonCopy<true, PixelFormat::DXT1_SRGB>,
MortonCopy<true, PixelFormat::DXT23_SRGB>,
MortonCopy<true, PixelFormat::DXT45_SRGB>,
MortonCopy<true, PixelFormat::BC7U_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
MortonCopy<true, PixelFormat::S8Z24>,
MortonCopy<true, PixelFormat::Z32FS8>,
};
static constexpr ConversionArray linear_to_morton_fns = {
// clang-format off
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::ABGR8UI>,
MortonCopy<false, PixelFormat::B5G6R5U>,
MortonCopy<false, PixelFormat::A2B10G10R10U>,
MortonCopy<false, PixelFormat::A1B5G5R5U>,
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
MortonCopy<false, PixelFormat::DXT1>,
MortonCopy<false, PixelFormat::DXT23>,
MortonCopy<false, PixelFormat::DXT45>,
MortonCopy<false, PixelFormat::DXN1>,
MortonCopy<false, PixelFormat::DXN2UNORM>,
MortonCopy<false, PixelFormat::DXN2SNORM>,
MortonCopy<false, PixelFormat::BC7U>,
MortonCopy<false, PixelFormat::BC6H_UF16>,
MortonCopy<false, PixelFormat::BC6H_SF16>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
MortonCopy<false, PixelFormat::BGRA8>,
MortonCopy<false, PixelFormat::RGBA32F>,
MortonCopy<false, PixelFormat::RG32F>,
MortonCopy<false, PixelFormat::R32F>,
MortonCopy<false, PixelFormat::R16F>,
MortonCopy<false, PixelFormat::R16U>,
MortonCopy<false, PixelFormat::R16S>,
MortonCopy<false, PixelFormat::R16UI>,
MortonCopy<false, PixelFormat::R16I>,
MortonCopy<false, PixelFormat::RG16>,
MortonCopy<false, PixelFormat::RG16F>,
MortonCopy<false, PixelFormat::RG16UI>,
MortonCopy<false, PixelFormat::RG16I>,
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
MortonCopy<false, PixelFormat::RGBA8_SRGB>,
MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::R32UI>,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::BGRA8_SRGB>,
MortonCopy<false, PixelFormat::DXT1_SRGB>,
MortonCopy<false, PixelFormat::DXT23_SRGB>,
MortonCopy<false, PixelFormat::DXT45_SRGB>,
MortonCopy<false, PixelFormat::BC7U_SRGB>,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32FS8>,
// clang-format on
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::ABGR8UI>,
MortonCopy<false, PixelFormat::B5G6R5U>,
MortonCopy<false, PixelFormat::A2B10G10R10U>,
MortonCopy<false, PixelFormat::A1B5G5R5U>,
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
MortonCopy<false, PixelFormat::DXT1>,
MortonCopy<false, PixelFormat::DXT23>,
MortonCopy<false, PixelFormat::DXT45>,
MortonCopy<false, PixelFormat::DXN1>,
MortonCopy<false, PixelFormat::DXN2UNORM>,
MortonCopy<false, PixelFormat::DXN2SNORM>,
MortonCopy<false, PixelFormat::BC7U>,
MortonCopy<false, PixelFormat::BC6H_UF16>,
MortonCopy<false, PixelFormat::BC6H_SF16>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
MortonCopy<false, PixelFormat::BGRA8>,
MortonCopy<false, PixelFormat::RGBA32F>,
MortonCopy<false, PixelFormat::RG32F>,
MortonCopy<false, PixelFormat::R32F>,
MortonCopy<false, PixelFormat::R16F>,
MortonCopy<false, PixelFormat::R16U>,
MortonCopy<false, PixelFormat::R16S>,
MortonCopy<false, PixelFormat::R16UI>,
MortonCopy<false, PixelFormat::R16I>,
MortonCopy<false, PixelFormat::RG16>,
MortonCopy<false, PixelFormat::RG16F>,
MortonCopy<false, PixelFormat::RG16UI>,
MortonCopy<false, PixelFormat::RG16I>,
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
MortonCopy<false, PixelFormat::RGBA8_SRGB>,
MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::R32UI>,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::BGRA8_SRGB>,
MortonCopy<false, PixelFormat::DXT1_SRGB>,
MortonCopy<false, PixelFormat::DXT23_SRGB>,
MortonCopy<false, PixelFormat::DXT45_SRGB>,
MortonCopy<false, PixelFormat::BC7U_SRGB>,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32FS8>,
};
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
@@ -191,45 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
/// 8x8 Z-Order coordinate from 2D coordinates
static u32 MortonInterleave(u32 x, u32 y) {
static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
return xlut[x % 8] + ylut[y % 8];
}
/// Calculates the offset of the position of the pixel in Morton order
static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
// Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
// of which is composed of four 2x2 subtiles each of which is composed of four texels.
// Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
// texels are laid out in a 2x2 subtile like this:
// 2 3
// 0 1
//
// The full 8x8 tile has the texels arranged like this:
//
// 42 43 46 47 58 59 62 63
// 40 41 44 45 56 57 60 61
// 34 35 38 39 50 51 54 55
// 32 33 36 37 48 49 52 53
// 10 11 14 15 26 27 30 31
// 08 09 12 13 24 25 28 29
// 02 03 06 07 18 19 22 23
// 00 01 04 05 16 17 20 21
//
// This pattern is what's called Z-order curve, or Morton order.
const unsigned int block_height = 8;
const unsigned int coarse_x = x & ~7;
u32 i = MortonInterleave(x, y);
const unsigned int offset = coarse_x * block_height;
return (i + offset) * bytes_per_pixel;
}
static u32 MortonInterleave128(u32 x, u32 y) {
// 128x128 Z-Order coordinate from 2D coordinates
static constexpr u32 xlut[] = {
@@ -325,14 +281,14 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, std::size_t buffer_size, VAddr addr) {
u8* buffer, u8* addr) {
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
tile_width_spacing, buffer, buffer_size, addr);
tile_width_spacing, buffer, addr);
}
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
u8* morton_data, u8* linear_data, bool morton_to_linear) {
void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
u8* data_ptrs[2];
for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < width; ++x) {

View File

@@ -13,9 +13,9 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, std::size_t buffer_size, VAddr addr);
u8* buffer, u8* addr);
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
u8* morton_data, u8* linear_data, bool morton_to_linear);
void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
} // namespace VideoCore

View File

@@ -4,6 +4,7 @@
#pragma once
#include <mutex>
#include <set>
#include <unordered_map>
@@ -12,14 +13,26 @@
#include "common/common_types.h"
#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
class RasterizerCacheObject {
public:
explicit RasterizerCacheObject(const u8* host_ptr)
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
virtual ~RasterizerCacheObject();
CacheAddr GetCacheAddr() const {
return cache_addr;
}
const u8* GetHostPtr() const {
return host_ptr;
}
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetAddr() const = 0;
virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
};
template <class T>
@@ -68,7 +83,9 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
void FlushRegion(CacheAddr addr, std::size_t size) {
std::lock_guard<std::recursive_mutex> lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
FlushObject(object);
@@ -76,7 +93,9 @@ public:
}
/// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) {
void InvalidateRegion(CacheAddr addr, u64 size) {
std::lock_guard<std::recursive_mutex> lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
for (auto& object : objects) {
if (!object->IsRegistered()) {
@@ -89,48 +108,60 @@ public:
/// Invalidates everything in the cache
void InvalidateAll() {
std::lock_guard<std::recursive_mutex> lock{mutex};
while (interval_cache.begin() != interval_cache.end()) {
Unregister(*interval_cache.begin()->second.begin());
}
}
protected:
/// Tries to get an object from the cache with the specified address
T TryGet(VAddr addr) const {
/// Tries to get an object from the cache with the specified cache address
T TryGet(CacheAddr addr) const {
const auto iter = map_cache.find(addr);
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
T TryGet(const void* addr) const {
const auto iter = map_cache.find(ToCacheAddr(addr));
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
/// Register an object into the cache
void Register(const T& object) {
virtual void Register(const T& object) {
std::lock_guard<std::recursive_mutex> lock{mutex};
object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
map_cache.insert({object->GetCacheAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
}
/// Unregisters an object from the cache
void Unregister(const T& object) {
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
if (Settings::values.use_accurate_gpu_emulation) {
FlushObject(object);
}
virtual void Unregister(const T& object) {
std::lock_guard<std::recursive_mutex> lock{mutex};
object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(object->GetAddr());
map_cache.erase(object->GetCacheAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified
u64 GetModifiedTicks() {
std::lock_guard<std::recursive_mutex> lock{mutex};
return ++modified_ticks;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
std::lock_guard<std::recursive_mutex> lock{mutex};
if (!object->IsDirty()) {
return;
}
@@ -140,7 +171,7 @@ protected:
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
if (size == 0) {
return {};
}
@@ -164,17 +195,18 @@ private:
}
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectCache = std::unordered_map<CacheAddr, T>;
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetAddr(),
object->GetAddr() + object->GetSizeInBytes());
return ObjectInterval::right_open(object->GetCacheAddr(),
object->GetCacheAddr() + object->GetSizeInBytes());
}
ObjectCache map_cache;
IntervalCache interval_cache; ///< Cache of objects
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
VideoCore::RasterizerInterface& rasterizer;
std::recursive_mutex mutex;
};

View File

@@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace VideoCore {
@@ -35,14 +34,14 @@ public:
virtual void FlushAll() = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
/// and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -63,7 +62,7 @@ public:
}
/// Increase/decrease the number of object in pages touching the specified region
virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,

View File

@@ -13,24 +13,28 @@
namespace OpenGL {
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
std::size_t alignment, u8* host_ptr)
: cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
host_ptr} {}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {}
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) {
GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
if (cache) {
auto entry = TryGet(*cpu_addr);
auto entry = TryGet(host_ptr);
if (entry) {
if (entry->size >= size && entry->alignment == alignment) {
return entry->offset;
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
return entry->GetOffset();
}
Unregister(entry);
}
@@ -39,17 +43,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
AlignBuffer(alignment);
const GLintptr uploaded_offset = buffer_offset;
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
if (!host_ptr) {
return uploaded_offset;
}
std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
auto entry = std::make_shared<CachedBufferEntry>();
entry->offset = uploaded_offset;
entry->size = size;
entry->alignment = alignment;
entry->addr = *cpu_addr;
auto entry = std::make_shared<CachedBufferEntry>(
*memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
Register(entry);
}

View File

@@ -17,22 +17,39 @@ namespace OpenGL {
class RasterizerOpenGL;
struct CachedBufferEntry final : public RasterizerCacheObject {
VAddr GetAddr() const override {
return addr;
class CachedBufferEntry final : public RasterizerCacheObject {
public:
explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
std::size_t alignment, u8* host_ptr);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return size;
}
std::size_t GetSize() const {
return size;
}
GLintptr GetOffset() const {
return offset;
}
std::size_t GetAlignment() const {
return alignment;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
VAddr addr;
std::size_t size;
GLintptr offset;
std::size_t alignment;
private:
VAddr cpu_addr{};
std::size_t size{};
GLintptr offset{};
std::size_t alignment{};
};
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
@@ -41,7 +58,7 @@ public:
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.

View File

@@ -7,7 +7,6 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -15,12 +14,13 @@
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
: cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
buffer.Create();
// Bind and unbind the buffer so it gets allocated by the driver
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
}
void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,10 +35,10 @@ void CachedGlobalRegion::Reload(u32 size_) {
// TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
const auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
@@ -46,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
return search->second;
}
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size,
u8* host_ptr) {
GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size);
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
ReserveGlobalRegion(region);
}
region->Reload(size);
@@ -58,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
reserve.insert_or_assign(region->GetAddr(), std::move(region));
reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -69,22 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
ASSERT(cbuf_addr);
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
ASSERT(actual_addr);
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
global_region.GetCbufOffset()};
const auto actual_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
// Look up global region in the cache based on address
GlobalRegion region = TryGet(*actual_addr);
const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
GlobalRegion region{TryGet(host_ptr)};
if (!region) {
// No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size);
region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
Register(region);
}

View File

@@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
explicit CachedGlobalRegion(VAddr addr, u32 size);
explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const override {
return addr;
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const override {
return size;
}
@@ -53,9 +51,8 @@ public:
}
private:
VAddr addr{};
VAddr cpu_addr{};
u32 size{};
OGLBuffer buffer;
};
@@ -68,11 +65,11 @@ public:
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr);
void ReserveGlobalRegion(GlobalRegion region);
std::unordered_map<VAddr, GlobalRegion> reserve;
std::unordered_map<CacheAddr, GlobalRegion> reserve;
};
} // namespace OpenGL

View File

@@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
return index_offset;
}
GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
u32 count) {
GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
ASSERT_MSG(cpu_addr, "Invalid GPU address");
const u8* source{Memory::GetPointer(*cpu_addr)};
const u8* source{memory_manager.GetPointer(gpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
return index_offset;
}
} // namespace OpenGL
} // namespace OpenGL

View File

@@ -24,7 +24,7 @@ public:
GLintptr MakeQuadArray(u32 first, u32 count);
GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;

View File

@@ -225,8 +225,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
if (!vertex_array.IsEnabled())
continue;
const Tegra::GPUVAddr start = vertex_array.StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
const u64 size = end - start + 1;
@@ -421,8 +421,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
if (!regs.vertex_array[index].IsEnabled())
continue;
const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
const GPUVAddr start = regs.vertex_array[index].StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
ASSERT(end > start);
size += end - start + 1;
@@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
return boost::make_iterator_range(map.equal_range(interval));
}
void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
@@ -747,20 +747,26 @@ void RasterizerOpenGL::DrawArrays() {
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) {
return;
}
res_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (!addr || !size) {
return;
}
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}
@@ -782,7 +788,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)};
const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
if (!surface) {
return {};
}

View File

@@ -57,9 +57,9 @@ public:
void DrawArrays() override;
void Clear() override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Common::Rectangle<u32>& src_rect,
@@ -67,7 +67,7 @@ public:
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;

View File

@@ -55,12 +55,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
}
}
void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
addr = cpu_addr ? *cpu_addr : 0;
gpu_addr = gpu_addr_;
host_ptr = memory_manager.GetPointer(gpu_addr_);
size_in_bytes = SizeInBytesRaw();
if (IsPixelFormatASTC(pixel_format)) {
@@ -223,7 +222,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
@@ -446,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
gl_buffer.data() + offset_gl, params.host_ptr + offset);
offset += layer_size;
offset_gl += gl_size;
}
@@ -455,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
gl_buffer.data(), gl_buffer.size(), params.addr + offset);
gl_buffer.data(), params.host_ptr + offset);
}
}
@@ -513,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
"reinterpretation but the texture is tiled.");
}
const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -563,8 +562,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
}
CachedSurface::CachedSurface(const SurfaceParams& params)
: params(params), gl_target(SurfaceTargetToGL(params.target)),
cached_size_in_bytes(params.size_in_bytes) {
: params{params}, gl_target{SurfaceTargetToGL(params.target)},
cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
const auto optional_cpu_addr{
Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
cpu_addr = *optional_cpu_addr;
texture.Create(gl_target);
// TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -603,19 +608,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
ApplyTextureDefaults(texture.handle, params.max_mip_level);
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
// Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
// R32F render buffer. We do not yet know if this is a game bug or something else, but this
// check is necessary to prevent flushing from overwriting unmapped memory.
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr};
if (cached_size_in_bytes > max_size) {
LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
cached_size_in_bytes = max_size;
}
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -633,10 +626,9 @@ void CachedSurface::LoadGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
params.size_in_bytes_gl);
std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
} else {
const u8* start = Memory::GetPointer(params.addr);
const u8* start{params.host_ptr};
u8* write_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(write_to, start, copy_size);
@@ -680,8 +672,6 @@ void CachedSurface::FlushGLBuffer() {
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
params.height, params.depth, true, true);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
@@ -691,9 +681,9 @@ void CachedSurface::FlushGLBuffer() {
const u32 bpp = params.GetFormatBpp() / 8;
const u32 copy_size = params.width * bpp;
if (params.pitch == copy_size) {
std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
} else {
u8* start = Memory::GetPointer(params.addr);
u8* start{params.host_ptr};
const u8* read_to = gl_buffer[0].data();
for (u32 h = params.height; h > 0; h--) {
std::memcpy(start, read_to, copy_size);
@@ -927,12 +917,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
if (params.addr == 0 || params.height * params.width == 0) {
if (!params.IsValid()) {
return {};
}
// Look up surface in the cache based on address
Surface surface{TryGet(params.addr)};
Surface surface{TryGet(params.host_ptr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
// Use the cached surface as-is unless it's not synced with memory
@@ -943,7 +933,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
UnregisterSurface(surface);
Unregister(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
@@ -951,7 +941,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
UnregisterSurface(surface);
Unregister(surface);
}
}
@@ -981,14 +971,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& init_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
VAddr address = init_params.addr;
const std::size_t layer_size = dst_params.LayerMemorySize();
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
GPUVAddr address{init_params.gpu_addr};
const std::size_t layer_size{dst_params.LayerMemorySize()};
for (u32 layer = 0; layer < dst_params.depth; layer++) {
for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
const Surface& copy = TryGet(sub_address);
if (!copy)
const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
if (!copy) {
continue;
}
const auto& src_params{copy->GetSurfaceParams()};
const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1163,7 +1155,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const auto& dst_params{dst_surface->GetSurfaceParams()};
// Flush enough memory for both the source and destination surface
FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
FlushRegion(ToCacheAddr(src_params.host_ptr),
std::max(src_params.MemorySize(), dst_params.MemorySize()));
LoadSurface(dst_surface);
}
@@ -1215,8 +1208,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
return TryGet(addr);
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
return TryGet(host_ptr);
}
void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1243,9 +1236,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
return {};
}
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
const std::size_t size = params.LayerMemorySize();
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
const std::size_t size{params.LayerMemorySize()};
GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
@@ -1267,7 +1260,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot =
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1276,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
}
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
if (bound2 > bound1)
return true;
const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1302,12 +1295,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface,
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
UnregisterSurface(intersect);
Unregister(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
UnregisterSurface(intersect);
Unregister(intersect);
return false;
}
FlushObject(intersect);
@@ -1327,7 +1320,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
void RasterizerCacheOpenGL::SignalPostDrawCall() {
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
if (current_color_buffers[i] != nullptr) {
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
Surface intersect =
CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
if (intersect != nullptr) {
PartialReinterpretSurface(current_color_buffers[i], intersect);
texception = true;

View File

@@ -109,6 +109,11 @@ struct SurfaceParams {
return size;
}
/// Returns true if the parameters constitute a valid rasterizer surface.
bool IsValid() const {
return gpu_addr && host_ptr && height && width;
}
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
/// mipmaps.
std::size_t LayerMemorySize() const {
@@ -210,7 +215,7 @@ struct SurfaceParams {
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
@@ -232,7 +237,7 @@ struct SurfaceParams {
}
/// Initializes parameters for caching, should be called after everything has been initialized
void InitCacheParameters(Tegra::GPUVAddr gpu_addr);
void InitCacheParameters(GPUVAddr gpu_addr);
std::string TargetName() const {
switch (target) {
@@ -296,8 +301,8 @@ struct SurfaceParams {
bool is_array;
bool srgb_conversion;
// Parameters used for caching
VAddr addr;
Tegra::GPUVAddr gpu_addr;
u8* host_ptr;
GPUVAddr gpu_addr;
std::size_t size_in_bytes;
std::size_t size_in_bytes_gl;
@@ -345,10 +350,10 @@ class RasterizerOpenGL;
class CachedSurface final : public RasterizerCacheObject {
public:
CachedSurface(const SurfaceParams& params);
explicit CachedSurface(const SurfaceParams& params);
VAddr GetAddr() const override {
return params.addr;
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
@@ -432,6 +437,7 @@ private:
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
VAddr cpu_addr{};
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -449,7 +455,7 @@ public:
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Tries to find a framebuffer using on the provided CPU address
Surface TryFindFramebufferSurface(VAddr addr) const;
Surface TryFindFramebufferSurface(const u8* host_ptr) const;
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +512,12 @@ private:
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
return SurfaceInterval::right_open(object->GetAddr() + 1,
object->GetAddr() + object->GetMemorySize() - 1);
return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
object->GetCacheAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +529,7 @@ private:
reinterpret_surface->MarkReinterpreted();
}
Surface CollideOnReinterpretedSurface(VAddr addr) const {
Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
@@ -532,13 +538,17 @@ private:
return nullptr;
}
void Register(const Surface& object) {
RasterizerCache<Surface>::Register(object);
}
/// Unregisters an object from the cache
void UnregisterSurface(const Surface& object) {
void Unregister(const Surface& object) {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
Unregister(object);
RasterizerCache<Surface>::Unregister(object);
}
};

Some files were not shown because too many files have changed in this diff Show More