Compare commits

...

174 Commits

Author SHA1 Message Date
zhupengfei
39e895c5ff citra_qt: Settings (configuration) rework 2019-03-07 16:55:50 +01:00
bunnei
4f352833a5 Merge pull request #2055 from bunnei/gpu-thread
Asynchronous GPU command processing
2019-03-07 10:41:53 -05:00
bunnei
076c76f4e4 Merge pull request #2149 from ReinUsesLisp/decoders-style
gl_rasterizer_cache: Move format conversion functions to their own file
2019-03-06 21:56:20 -05:00
bunnei
ed0bdcc638 Merge pull request #2197 from lioncash/include
core/hle/ipc: Remove unnecessary includes
2019-03-06 21:55:16 -05:00
bunnei
84ad81ee67 gpu_thread: Fix deadlock with threading idle state check. 2019-03-06 21:48:57 -05:00
bunnei
63aa08acbe gpu_thread: (HACK) Ignore flush on FlushAndInvalidateRegion. 2019-03-06 21:48:57 -05:00
bunnei
3f1b4fb23a gpu: Always flush. 2019-03-06 21:48:57 -05:00
bunnei
aaa373585c gpu: Refactor a/synchronous implementations into their own classes. 2019-03-06 21:48:57 -05:00
bunnei
7b574f406b gpu: Move command processing to another thread. 2019-03-06 21:48:57 -05:00
bunnei
65651078e5 bootmanager: Ensure that we have a context for shader loading. 2019-03-06 21:48:56 -05:00
bunnei
d2ff93c319 Merge pull request #2190 from lioncash/ogl-global
core: Remove the global telemetry accessor function
2019-03-06 21:41:53 -05:00
bunnei
ac51d048a9 gpu: Refactor command and swap buffers interface for asynch. 2019-03-06 21:09:09 -05:00
bunnei
4483089d70 gpu: Refactor to take RendererBase instead of RasterizerInterface. 2019-03-06 21:09:09 -05:00
bunnei
d6015ee211 settings: Add new graphics setting for use_asynchronous_gpu_emulation. 2019-03-06 21:09:09 -05:00
bunnei
81e086b5ac core: Set is_powered_on before GPU is initialized. 2019-03-06 21:07:33 -05:00
bunnei
75b417489a Merge pull request #2199 from lioncash/arbiter
kernel/address_arbiter: Convert the address arbiter into a class
2019-03-06 15:55:56 -05:00
bunnei
e9b05e86b9 Merge pull request #2201 from lioncash/audio-retval
hle/service/audio/audout_u: Correct lack of return in failure case of AppendAudioOutBufferImpl()
2019-03-06 14:20:04 -05:00
bunnei
8ee78521fa Merge pull request #2204 from lioncash/wait-tree
yuzu/debugger/wait_tree: Remove use of global CurrentProcess accessor
2019-03-06 14:17:34 -05:00
Lioncash
9ac176d5a3 hle/service/audio/audout_u: Correct lack of return in failure case of AppendAudioOutBufferImpl()
Previously we were overwriting the error case with a success code
further down (which is definitely not what we should be doing here).
2019-03-06 11:44:32 -05:00
bunnei
234f00bdd4 Merge pull request #2194 from lioncash/mem
svc: Move memory range checking functions to the VMManager class
2019-03-06 11:43:07 -05:00
bunnei
5a57b1a09b Merge pull request #2200 from lioncash/audio
hle/service/audio: Extract audio error codes to a header
2019-03-06 10:52:45 -05:00
bunnei
22f105c06d Merge pull request #2203 from lioncash/engines-include
video_core/engines: Remove unnecessary includes
2019-03-06 10:51:27 -05:00
bunnei
10f08ab9ec Merge pull request #2198 from lioncash/todo
{kernel/thread, video_core/surface}: Remove obsolete TODOs
2019-03-06 10:51:03 -05:00
Lioncash
196cc82913 yuzu/debugger/wait_tree: Remove use of global CurrentProcess accessor
We already have the thread instance that was created under the current
process, so we can just pass the handle table of it along to retrieve
the owner of the mutex.
2019-03-05 21:52:21 -05:00
Lioncash
f9ee0dc7ee video_core/engines: Remove unnecessary includes
Removes a few unnecessary dependencies on core-related machinery, such
as the core.h and memory.h, which reduces the amount of rebuilding
necessary if those files change.

This also uncovered some indirect dependencies within other source
files. This also fixes those.
2019-03-05 20:35:32 -05:00
Lioncash
ad9dbeb44b hle/service/audio: Extract audio error codes to a header
Places all error codes in an easily includable header.

This also corrects the unsupported error code (I accidentally used the
hex value when I meant to use the decimal one).
2019-03-05 16:51:37 -05:00
Lioncash
c161389a0f kernel/address_arbiter: Pass in system instance to constructor
Allows getting rid of reliance on the global accessor functions and
instead operating on the provided system instance.
2019-03-05 15:47:03 -05:00
Lioncash
9d9676f620 kernel/address_arbiter: Minor tidying up
- Invert conditions into guard clases where applicable.
- Mark std::vector parameter of WakeThreads as const
2019-03-05 12:58:31 -05:00
Lioncash
ec6664f6d6 kernel/address_arbiter: Convert the address arbiter into a class
Places all of the functions for address arbiter operation into a class.
This will be necessary for future deglobalizing efforts related to both
the memory and system itself.
2019-03-05 12:58:26 -05:00
Lioncash
42085ff110 video_core/surface: Remove obsolete TODO in PixelFormatFromRenderTargetFormat()
This isn't needed anymore, according to Hexagon
2019-03-05 10:15:06 -05:00
Lioncash
79f970e6de kernel/thread: Remove obsolete TODO in Create()
This is a TODO carried over from Citra that doesn't apply here.
2019-03-05 10:05:49 -05:00
Lioncash
02bc9e9de1 core/hle/ipc: Remove unnecessary includes
Removes a few inclusion dependencies from the headers or replaces
existing ones with ones that don't indirectly include the required
headers.

This allows removing an inclusion of core/memory.h, meaning that if the
memory header is ever changed in the future, it won't result in
rebuilding the entirety of the HLE services (as the IPC headers are used
quite ubiquitously throughout the HLE service implementations).
2019-03-05 09:53:38 -05:00
bunnei
cc92c054ec Merge pull request #2185 from FearlessTobi/port-4630
Port citra-emu/citra#4630: "Memory: don't lock hle mutex in memory read/write"
2019-03-04 18:44:53 -05:00
Lioncash
40de7f6fe8 vm_manager: Use range helpers in HeapAlloc() and HeapFree()
Significantly tidies up two guard conditionals.
2019-03-04 17:16:52 -05:00
Lioncash
6c42a23550 vm_manager: Provide address range checking functions for other memory regions
Makes the interface uniform when it comes to checking various memory
regions.
2019-03-04 17:08:55 -05:00
Lioncash
0be8fffc99 svc: Migrate address range checking functions to VMManager
Provides a bit of a more proper interface for these functions.
2019-03-04 16:32:03 -05:00
bunnei
07e13d6728 Merge pull request #2165 from ReinUsesLisp/unbind-tex
gl_rasterizer: Unbind textures but don't apply the gl_state
2019-03-04 13:51:59 -05:00
bunnei
6ad66acce2 Merge pull request #2188 from lioncash/log-static
logging/backend: Move CreateEntry into the Impl class. Relocate local static to a class variable
2019-03-04 13:46:01 -05:00
Lioncash
b114928459 core/core: Remove the global telemetry accessor function
With all usages converted off of it, this function can be removed.
2019-03-04 10:24:13 -05:00
Lioncash
319365fdf0 yuzu: Remove usage of the global telemetry accessor
In these cases the system object is nearby, and in the other, the
long-form of accessing the telemetry instance is already used, so we can
get rid of the use of the global accessor.
2019-03-04 10:24:13 -05:00
Lioncash
697a4669e1 yuzu-cmd/yuzu: Replace direct usage of the global system telemetry accessor in main()
We already have the system instance around, so we can use that instead
of the accessor.
2019-03-04 10:24:13 -05:00
Lioncash
b5f0dc95db core/core: Replace direct usage of the global system telemetry accessor from Shutdown()
The telemetry instance is actually a member of the class itself, so we
can access it directly instead of going through the global accessor.
2019-03-04 10:24:13 -05:00
Lioncash
90febaf717 video_core/renderer_opengl: Replace direct usage of global system object accessors
We already pass a reference to the system object to the constructor of the renderer,
so we can just use that instead of using the global accessor functions.
2019-03-04 10:24:09 -05:00
bunnei
be6bf37224 Merge pull request #2189 from lioncash/web
web_service: Remove unnecessary inclusions
2019-03-03 22:56:49 -05:00
Lioncash
aa30fd75cd web_service: Remove unnecessary inclusions
Reduces the potential amount of rebuilding necessary if any headers
change. In particular, we were including a header from the core library
when we don't even link the core library to the web_service library, so
this also gets rid of an indirect dependency.
2019-03-02 14:58:49 -05:00
Mat M
169d19f7b9 Merge pull request #2154 from FearlessTobi/port-4647
Port citra-emu/citra#4647: "citra_qt/main: make SPEED_LIMIT_STEP static constexpr"
2019-03-02 14:46:04 -05:00
Lioncash
f8f1ff0b4f logging/backend: Make time_origin a class variable instead of a local static
Moves local global state into the Impl class itself and initializes it
at the creation of the instance instead of in the function.

This makes it nicer for weakly-ordered architectures, given the
CreateEntry() class won't need to have atomic loads executed for each
individual call to the CreateEntry class.
2019-03-02 14:44:24 -05:00
Lioncash
43c1092031 logging/backend: Move CreateEntry into the Impl class
This function is only ever used within this source file and makes it
easier to remove static state in the following change.
2019-03-02 14:44:24 -05:00
Mat M
a461e266ea Merge pull request #2183 from ReinUsesLisp/vk-buffer-cache-clang
vk_buffer_cache: Fix clang-format
2019-03-02 14:43:15 -05:00
James Rowe
2e2f6aa71a Merge pull request #2186 from honzapatCZ/patch-1
Yuzu can render 3D.
2019-03-02 10:10:01 -07:00
fearlessTobi
71c30a0a89 citra_qt/main: make SPEED_LIMIT_STEP static constexpr
MSVC does not seem to like using constexpr values in a lambda that were declared outside of it.
Previously on MSVC build the hotkeys to inc-/decrease the speed limit were not working correctly because in the lambda the SPEED_LIMIT_STEP had garbage values.
After googling around a bit I found: https://github.com/codeplaysoftware/computecpp-sdk/issues/95 which seems to be a similar issue.
Trying the suggested fix to make the variable static constexpr also fixes the bug here.
2019-03-02 17:43:19 +01:00
Nejcraft
90fd257b47 Yuzu can render 3D.
Yuzu can now render 3D graphics to some degree.
2019-03-02 17:23:05 +01:00
Weiyi Wang
5159f4eee8 Memory: don't lock hle mutex in memory read/write
The comment already invalidates itself: neither MMIO nor rasterizer cache belongsHLE kernel state. This mutex has a too large scope if MMIO or cache is included, which is prone to dead lock when multiple thread acquires these resource at the same time. If necessary, each MMIO component or rasterizer should have their own lock.
2019-03-02 15:20:05 +01:00
bunnei
3c39b39bbc Merge pull request #2182 from bunnei/my-wasted-friday
fuck git for ruining my day, I will learn but I will not forgive
2019-03-02 00:57:15 -05:00
ReinUsesLisp
8e84e81e74 vk_buffer_cache: Fix clang-format 2019-03-02 02:16:45 -03:00
bunnei
e22670fbc3 Merge pull request #2178 from ReinUsesLisp/vk-buffer-cache
vk_buffer_cache: Implement a buffer cache
2019-03-02 00:13:33 -05:00
bunnei
ab70c2583d fuck git for ruining my day, I will learn but I will not forgive 2019-03-02 00:01:34 -05:00
ReinUsesLisp
35c105a108 vk_buffer_cache: Implement a buffer cache
This buffer cache is just like OpenGL's buffer cache with some minor
style changes. It uses VKStreamBuffer.
2019-03-01 17:33:36 -03:00
bunnei
1da8a0c2a8 Merge pull request #2173 from lioncash/capture
yuzu/compatdb: Remove unused lambda capture
2019-03-01 09:55:35 -05:00
bunnei
12e74fe801 Merge pull request #2180 from lioncash/audren
service/audio: Provide an implementation of ExecuteAudioRendererRendering
2019-03-01 09:50:14 -05:00
bunnei
115fc6120c Merge pull request #2181 from lioncash/audren2
service/audio/audren_u: Implement OpenAudioRendererAuto
2019-03-01 09:49:23 -05:00
Lioncash
84aff56644 service/audio/audren_u: Implement OpenAudioRendererAuto
This currently has the same behavior as the regular
OpenAudioRenderer API function, so we can just move the code within
OpenAudioRenderer to an internal function that both service functions
call.
2019-03-01 05:40:29 -05:00
Lioncash
42dc73157c service/audio: Provide an implementation of ExecuteAudioRendererRendering
This service function appears to do nothing noteworthy on the switch.
All it does at the moment is either return an error code or abort the
system. Given we obviously don't want to kill the system, we just opt
for always returning the error code.
2019-03-01 03:37:00 -05:00
ReinUsesLisp
e85066dac7 gl_rasterizer: Remove texture unbinding after dispatching a draw call
Unbinding was required when OpenGL delete operations didn't unbind a
resource if it was bound. This is no longer needed and can be removed.
2019-02-28 00:17:50 -03:00
ReinUsesLisp
bb3ab7d66c gl_state: Fixup multibind bug 2019-02-28 00:17:03 -03:00
bunnei
49c6d21b31 Merge pull request #2174 from lioncash/fwd
service/hid: Amend forward declaration of ServiceManager
2019-02-27 21:20:06 -05:00
bunnei
1b13859af8 Merge pull request #2152 from ReinUsesLisp/vk-stream-buffer
vk_stream_buffer: Implement a stream buffer
2019-02-27 21:19:15 -05:00
bunnei
1f5d6a8fed Merge pull request #2121 from FernandoS27/texception2
Improve the Accuracy of the Rasterizer Cache through a Texception Pass
2019-02-27 21:17:55 -05:00
bunnei
66f4fd4c81 Merge pull request #2172 from lioncash/reorder
gl_rasterizer/vk_memory_manager: Silence -Wreorder warnings
2019-02-27 21:14:20 -05:00
Fernando Sahmkow
7ea097e5c2 Devirtualize Register/Unregister and use a wrapper instead. 2019-02-27 21:58:50 -04:00
Fernando Sahmkow
5a9204dbd7 Corrections and redesign. 2019-02-27 21:58:49 -04:00
Fernando Sahmkow
d6b9b51606 Fix linux compile error. 2019-02-27 21:58:48 -04:00
Fernando Sahmkow
e64fa4d2ea Remove NotifyFrameBuffer as we are doing a texception pass every drawcall. 2019-02-27 21:58:47 -04:00
Fernando Sahmkow
3558c88442 Remove certain optimizations that caused texception to fail in certain scenarios. 2019-02-27 21:58:45 -04:00
Fernando Sahmkow
e9d84ef22c Bug fixes and formatting 2019-02-27 21:58:44 -04:00
Fernando Sahmkow
5bc82d124c rasterizer_cache_gl: Implement Texception Pass 2019-02-27 21:58:43 -04:00
Fernando Sahmkow
8932001610 rasterizer_cache_gl: Implement Partial Reinterpretation of Surfaces. 2019-02-27 21:58:40 -04:00
Fernando Sahmkow
44ea2810e4 rasterizer_cache: mark reinterpreted surfaces and add ability to reload marked surfaces on next use. 2019-02-27 21:58:39 -04:00
Fernando Sahmkow
d583fc1e97 rasterizer_cache_gl: Notify on framebuffer change 2019-02-27 21:58:37 -04:00
Fernando Sahmkow
45b6d2d349 rasterizer_cache: Expose FlushObject to Child classes and allow redefining of Register and Unregister 2019-02-27 21:57:33 -04:00
bunnei
f15e2dd881 Merge pull request #2163 from ReinUsesLisp/bitset-dirty
maxwell_3d: Use std::bitset to manage dirty flags
2019-02-27 20:50:08 -05:00
Annomatg
ef84c70d22 Speed up memory page mapping (#2141)
- Memory::MapPages total samplecount was reduced from 4.6% to 1.06%.
- From main menu into the game from 1.03% to 0.35%
2019-02-27 17:22:47 -05:00
bunnei
532dda0499 Merge pull request #2176 from lioncash/com
audio_core/cubeb_sink: Ensure COM is initialized on Windows prior to calling cubeb_init
2019-02-27 17:12:06 -05:00
Lioncash
1068c1b06f audio_core/cubeb_sink: Ensure COM is initialized on Windows prior to calling cubeb_init
cubeb now requires that COM explicitly be initialized on the thread
prior to calling cubeb_init.
2019-02-27 16:14:53 -05:00
Lioncash
6335bf136f service/hid: Amend forward declaration of ServiceManager
The SM namespace is within the Service namespace, so this was forward
declaring a type that didn't exist.
2019-02-27 11:36:48 -05:00
Lioncash
456c7043bd yuzu/compatdb: Remove unused lambda capture
Silences a compiler warning with clang.
2019-02-27 11:30:36 -05:00
bunnei
42f7c11021 Merge pull request #2169 from lioncash/naming
audio_core/audio_renderer: Provide names for some parameters of AudioRendererParameter
2019-02-27 11:26:54 -05:00
bunnei
14430f7df9 Merge pull request #2170 from lioncash/emu-window
core/frontend/emu_window: Make ClipToTouchScreen a const member function
2019-02-27 11:26:24 -05:00
bunnei
eb5a3dd1c7 Merge pull request #2161 from lioncash/handle-table
kernel/handle_table: Allow process capabilities to limit the handle table size
2019-02-27 11:22:26 -05:00
bunnei
be1a1584fc Merge pull request #2168 from lioncash/cubeb
externals: Update cubeb to the master version
2019-02-27 11:20:14 -05:00
bunnei
66e023fba2 Merge pull request #2167 from lioncash/namespace
common: Move Quaternion, Rectangle, Vec2, Vec3, and Vec4 into the Common namespace
2019-02-27 11:19:53 -05:00
bunnei
b27e6ad912 Merge pull request #2171 from lioncash/pragma
gl_shader_disk_cache: Remove #pragma once from cpp file
2019-02-27 11:19:17 -05:00
Lioncash
16ea93c11e vk_memory_manager: Reorder constructor initializer list in terms of member declaration order
Reorders members in the order that they would actually be initialized
in. Silences a -Wreorder warning.
2019-02-27 11:08:19 -05:00
Lioncash
a6a783b3dc gl_rasterizer: Reorder constructor initializer list in terms of member declaration order
Orders the members in the order they would actually be initialized in.
Silences a -Wreorder warning.
2019-02-27 11:08:19 -05:00
Lioncash
e7eff72e83 gl_shader_disk_cache: Remove #pragma once from cpp file
This is only necessary in headers. Silences a warning with clang.
2019-02-27 11:02:49 -05:00
Lioncash
46b3209abb core/frontend/emu_window: Make ClipToTouchScreen a const member function
This member function doesn't modify instance state, so it can have the
const specifier applied to it.
2019-02-27 08:54:42 -05:00
Lioncash
0e1b5acc6a audio_core/audio_renderer: Name previously unknown parameters of AudioRendererParameter
Provides names for previously unknown entries (aside from the two u8
that appear to be padding bytes, and a single word that also appears
to be reserved or padding).

This will be useful in subsequent changes when unstubbing behavior related
to the audio renderer services.
2019-02-27 06:09:07 -05:00
Lioncash
b9238edd0d common/math_util: Move contents into the Common namespace
These types are within the common library, so they should be within the
Common namespace.
2019-02-27 03:38:39 -05:00
Lioncash
e56f32a071 externals: Update cubeb to 6f2420de8f155b10330cf973900ac7bdbfee589d
Keeps the audio library we use up to date.
2019-02-27 01:21:51 -05:00
Lioncash
1b855efd5e common/vector_math: Move Vec[x] types into the Common namespace
These types are within the common library, so they should be using the
Common namespace.
2019-02-26 22:38:36 -05:00
Lioncash
a1574aabd5 common/quaternion: Move Quaternion into the Common namespace
Quaternion is within the common library, so it should be using the
Common namespace.
2019-02-26 22:31:17 -05:00
ReinUsesLisp
0ad3c031f4 gl_rasterizer_cache: Move format conversion to its own file 2019-02-26 20:08:27 -03:00
ReinUsesLisp
0ccd490fcd decoders: Minor style changes 2019-02-26 20:08:27 -03:00
bunnei
10d1d58390 Merge pull request #2164 from ReinUsesLisp/configure-blit
renderer_opengl: Update pixel format tracking
2019-02-26 12:12:10 -05:00
ReinUsesLisp
d91e35a50a renderer_opengl: Update pixel format tracking 2019-02-26 03:47:16 -03:00
ReinUsesLisp
5219edd715 maxwell_3d: Use std::bitset to manage dirty flags 2019-02-26 03:01:48 -03:00
ReinUsesLisp
730eb1dad7 vk_stream_buffer: Remove copy code path 2019-02-26 02:09:43 -03:00
bunnei
c3471bf618 Merge pull request #2156 from FreddyFunk/patch-1
file_sys/vfs_vector: Fix ignored offset on Write
2019-02-25 18:28:58 -05:00
bunnei
da1b45de34 Merge pull request #2158 from lioncash/table
service/vi: Update IManagerDisplayService's function table
2019-02-25 18:27:43 -05:00
bunnei
1cffd3848b Merge pull request #2160 from lioncash/audio-warn
audio_core: Resolve compilation warnings
2019-02-25 18:25:36 -05:00
bunnei
93c1630570 Merge pull request #2159 from lioncash/warn
shader/track: Resolve variable shadowing warnings
2019-02-25 13:26:00 -05:00
Lioncash
d29f9e9709 kernel/handle_table: Make local variables as const where applicable
Makes immutable state explicit.
2019-02-25 11:12:38 -05:00
Lioncash
5167d1577d kernel/handle_table: Allow process capabilities to limit the handle table size
The kernel allows restricting the total size of the handle table through
the process capability descriptors. Until now, this functionality wasn't
hooked up. With this, the process handle tables become properly restricted.

In the case of metadata-less executables, the handle table will assume
the maximum size is requested, preserving the behavior that existed
before these changes.
2019-02-25 11:12:32 -05:00
Lioncash
4f8cd74061 kernel/handle-table: In-class initialize data members
Directly initializes members where applicable.
2019-02-25 10:14:05 -05:00
Lioncash
0220862ba5 kernel/handle_table: Resolve truncation warnings
Avoids implicit truncation warnings from u32 -> u16 (the truncation is
desirable behavior here).
2019-02-25 09:53:21 -05:00
Lioncash
04d7b7e09d audio_core/cubeb_sink: Initialize CubebSinkStream's last_frame data member
Ensures that all member variables are initialized in a deterministic
manner across the board.
2019-02-25 09:40:37 -05:00
Lioncash
8250f9bb1c audio_core/cubeb_sink: Add override specifier to destructor
CubebSinkStream inherits from a base class with a virtual destructor, so
override can be appended to CubebSinkStream's destructor.
2019-02-25 09:38:27 -05:00
Lioncash
7cdeec20ec audio_core/cubeb_sink: Resolve variable shadowing warnings in SamplesInQueue
The name of the parameter was shadowing the member variable of the same
name. Instead, alter the name of the parameter to prevent said
shadowing.
2019-02-25 09:28:51 -05:00
Lioncash
a12f4efa2f audio_core/codec: Resolve truncation warnings within DecodeADPCM
The assignments here were performing an implicit truncation from int to
s16. Make it explicit that this is desired behavior.
2019-02-25 09:24:39 -05:00
Lioncash
c1b2e35625 shader/track: Resolve variable shadowing warnings 2019-02-25 09:10:59 -05:00
Lioncash
be7dad5e7e service/vi: Update IManagerDisplayService's function table
Amends it to add the 7.0.0+ CreateStrayLayer function.
2019-02-25 08:09:00 -05:00
bunnei
c07987dfab Merge pull request #2118 from FernandoS27/ipa-improve
shader_decompiler: Improve Accuracy of Attribute Interpolation.
2019-02-24 23:04:22 -05:00
bunnei
c4243c07cc Merge pull request #2119 from FernandoS27/fix-copy
rasterizer_cache_gl: Only do fast layered copy on the same format.
2019-02-24 23:03:52 -05:00
bunnei
c6170565b5 Merge pull request #2155 from FearlessTobi/port-4655
Port citra-emu/citra#4655: "Remove GCC version checks"
2019-02-24 23:03:13 -05:00
bunnei
57985fb16a Merge pull request #2144 from lioncash/factor
service/vi: Convert Display and Layer structs into classes
2019-02-24 23:02:50 -05:00
Frederic L
517933adcb file_sys/vfs_vector: Fix ignored offset on Write 2019-02-25 00:27:49 +01:00
tgsm
030814b1cb Remove GCC version checks
Citra can't be compiled using GCC <7 because of required C++17 support, so these version checks don't need to exist anymore.
2019-02-24 15:24:06 +01:00
bunnei
90c780e6f3 Merge pull request #2139 from degasus/dma_pusher
video_core/dma_pusher: The full list of headers at once.
2019-02-24 04:15:49 -05:00
ReinUsesLisp
33a0597603 vk_stream_buffer: Implement a stream buffer
This manages two kinds of streaming buffers: one for unified memory
models and one for dedicated GPUs. The first one skips the copy from the
staging buffer to the real buffer, since it creates an unified buffer.

This implementation waits for all fences to finish their operation
before "invalidating". This is suboptimal since it should allocate
another buffer or start searching from the beginning. There is room for
improvement here.

This could also handle AMD's "pinned" memory (a heap with 256 MiB) that
seems to be designed for buffer streaming.
2019-02-24 04:27:51 -03:00
ReinUsesLisp
281a8bf259 vk_resource_manager: Minor VKFenceWatch changes 2019-02-24 04:19:04 -03:00
bunnei
f7090bacc5 Merge pull request #2146 from ReinUsesLisp/vulkan-scheduler
vk_scheduler: Implement a scheduler
2019-02-23 23:32:43 -05:00
bunnei
d062991643 Merge pull request #2150 from ReinUsesLisp/fixup-layer-swizzle
gl_rasterizer_cache: Fixup parameter order in layered swizzle
2019-02-23 23:31:34 -05:00
bunnei
4ab978d670 Merge pull request #2151 from ReinUsesLisp/fixup-vk-memory-manager
vk_memory_manager: Fixup commit interval allocation
2019-02-23 23:29:53 -05:00
ReinUsesLisp
92050c4d86 vk_memory_manager: Fixup commit interval allocation
VKMemoryCommitImpl was using as the end of its interval "begin + end".
That ended up wasting memory.
2019-02-24 01:04:41 -03:00
ReinUsesLisp
abef11a540 gl_rasterizer_cache: Fixup parameter order in layered swizzle 2019-02-23 23:27:30 -03:00
ReinUsesLisp
f546fb35ed vk_scheduler: Implement a scheduler
The scheduler abstracts command buffer and fence management with an
interface that's able to do OpenGL-like operations on Vulkan command
buffers.

It returns by value a command buffer and fence that have to be used for
subsequent operations until Flush or Finish is executed, after that the
current execution context (the pair of command buffers and fences) gets
invalidated a new one must be fetched. Thankfully validation layers will
quickly detect if this is skipped throwing an error due to modifications
to a sent command buffer.
2019-02-22 01:33:32 -03:00
bunnei
94b27bb8a5 Merge pull request #2138 from ReinUsesLisp/vulkan-memory-manager
vk_memory_manager: Implement memory manager
2019-02-21 22:26:54 -05:00
Lioncash
90528f1326 service/nvflinger: Store BufferQueue instances as regular data members
The NVFlinger service is already passed into services that need to
guarantee its lifetime, so the BufferQueue instances will already live
as long as they're needed. Making them std::shared_ptr instances in this
case is unnecessary.
2019-02-21 22:09:46 -05:00
Lioncash
fd15730767 service/vi/vi_layer: Convert Layer struct into a class
Like the previous changes made to the Display struct, this prepares the
Layer struct for changes to its interface. Given Layer will be given
more invariants in the future, we convert it into a class to better
signify that.
2019-02-21 12:13:09 -05:00
Lioncash
fa4dc2cf42 service/nvflinger: Move display specifics over to vi_display
With the display and layer structures relocated to the vi service, we
can begin giving these a proper interface before beginning to properly
support the display types.

This converts the display struct into a class and provides it with the
necessary functions to preserve behavior within the NVFlinger class.
2019-02-21 12:13:04 -05:00
bunnei
9539c4203b Merge pull request #2125 from ReinUsesLisp/fixup-glstate
gl_state: Synchronize gl_state even when state is disabled
2019-02-20 21:47:46 -05:00
bunnei
ae437320c8 Merge pull request #2130 from lioncash/system_engine
video_core: Remove usages of System::GetInstance() within the engines
2019-02-20 21:24:56 -05:00
Jungy
3273f93cd5 Fixes Unicode Key File Directories (#2120)
* Fixes Unicode Key File Directories

Adds code so that when loading a file it converts to UTF16 first, to
ensure the files can be opened. Code borrowed from FileUtil::Exists.

* Update src/core/crypto/key_manager.cpp

Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com>

* Update src/core/crypto/key_manager.cpp

Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com>

* Using FileUtil instead to be cleaner.

* Update src/core/crypto/key_manager.cpp

Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com>
2019-02-20 21:24:25 -05:00
bunnei
ef559f5741 Merge pull request #2142 from lioncash/relocate
service/nvflinger: Relocate definitions of Layer and Display to the vi service
2019-02-20 21:21:55 -05:00
Lioncash
8d5d369b54 service/nvflinger: Relocate definitions of Layer and Display to the vi service
These are more closely related to the vi service as opposed to the
intermediary nvflinger.

This also places them in their relevant subfolder, as future changes to
these will likely result in subclassing to represent various displays
and services, as they're done within the service itself on hardware.

The reasoning for prefixing the display and layer source files is to
avoid potential clashing if two files with the same name are compiled
(e.g. if 'display.cpp/.h' or 'layer.cpp/.h' is added to another service
at any point), which MSVC will actually warn against. This prevents that
case from occurring.

This also presently coverts the std::array introduced within
f45c25aaba back to a std::vector to allow
the forward declaration of the Display type. Forward declaring a type
within a std::vector is allowed since the introduction of N4510
(http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4510.html) by
Zhihao Yuan.
2019-02-19 18:27:16 -05:00
Markus Wick
6dd40976d0 video_core/dma_pusher: Simplyfy Step() logic.
As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call.
This cleans up the Step() logic quite a bit.
2019-02-19 10:28:42 +01:00
Markus Wick
717394c980 video_core/dma_pusher: The full list of headers at once.
Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible.
This reduces the Memory::* calls by the dma_pusher by a factor of 10.
2019-02-19 09:58:38 +01:00
ReinUsesLisp
b675c97cdd vk_memory_manager: Implement memory manager
A memory manager object handles the memory allocations for a device. It
allocates chunks of Vulkan memory objects and then suballocates.
2019-02-19 03:42:28 -03:00
bunnei
4bce08d497 Merge pull request #2122 from ReinUsesLisp/vulkan-resource-manager
vk_resource_manager: Implement fence and command buffer allocator
2019-02-18 21:05:28 -05:00
bunnei
2bb02a0b78 Merge pull request #2134 from lioncash/naming
audio_core/buffer: Make const and non-const getter for samples consistent
2019-02-17 11:26:33 -05:00
bunnei
e869c5ef1a Merge pull request #2133 from lioncash/arbiter
address_arbiter: Use nested namespaces where applicable
2019-02-16 15:37:21 -05:00
bunnei
4699fdca8f Merge pull request #2127 from FearlessTobi/fix-screenshot-srgb
renderer_opengl: respect the sRGB colorspace for the screenshot feature
2019-02-16 15:36:00 -05:00
bunnei
cd7e1183e2 Merge pull request #2128 from FearlessTobi/port-4197
Port citra-emu/citra#4197: "threadsafe_queue: Add PopWait and use it where possible "
2019-02-16 15:34:49 -05:00
Lioncash
b009bda67a audio_core/buffer: Make const and non-const getter for samples consistent
This way proper const/non-const selection can occur.
2019-02-16 15:21:35 -05:00
Lioncash
0113c36300 address_arbiter: Use nested namespaces where applicable
A fairly trivial change. Other sections of the codebase use nested
namespaces instead of separate namespaces here. This one must have just
been overlooked.
2019-02-16 12:41:30 -05:00
Lioncash
a8fa5019b5 video_core: Remove usages of System::GetInstance() within the engines
Avoids the use of the global accessor in favor of explicitly making the
system a dependency within the interface.
2019-02-15 22:06:23 -05:00
James Rowe
99da6362c4 Merge pull request #2123 from lioncash/coretiming-global
core_timing: De-globalize core_timing facilities
2019-02-15 19:52:11 -07:00
B3n30
2195f10d15 Adressed review comments 2019-02-15 22:14:54 +01:00
B3n30
4154936568 threadsafe_queue: Add WaitIfEmpty and use it in logging 2019-02-15 22:12:54 +01:00
fearlessTobi
9a56b99fa4 renderer_opengl: respect the sRGB colorspace for the screenshot feature
Previously, we were completely ignoring for screenshots whether the game uses RGB or sRGB.
This resulted in screenshot colors that looked off for some titles.
2019-02-15 21:27:29 +01:00
ReinUsesLisp
8dfc81239f gl_state: Synchronize gl_state even when state is disabled
There are some potential edge cases where gl_state may fail to track the
state if a related state changes while the toggle is disabled or it
didn't change. This addresses that.
2019-02-15 01:30:14 -03:00
bunnei
4327f430f1 Merge pull request #2112 from lioncash/shadowing
gl_rasterizer_cache: Get rid of variable shadowing
2019-02-14 21:45:20 -05:00
bunnei
a8fc5d6edd Merge pull request #2111 from ReinUsesLisp/fetch-fix
gl_shader_decompiler: Re-implement TLDS lod
2019-02-14 21:42:34 -05:00
ReinUsesLisp
ae6c052ed9 vk_resource_manager: Implement a command buffer pool with VKFencedPool 2019-02-14 18:44:26 -03:00
ReinUsesLisp
a2b6de7e9f vk_resource_manager: Add VKFencedPool interface
Handles a pool of resources protected by fences. Manages resource
overflow allocating more resources.

This class is intended to be used through inheritance.
2019-02-14 18:44:26 -03:00
ReinUsesLisp
0ffdd0a683 vk_resource_manager: Implement VKResourceManager and fence allocator
CommitFence iterates a pool of fences until one is found. If all fences
are being used at the same time, allocate more.
2019-02-14 18:44:26 -03:00
ReinUsesLisp
aa0b6babda vk_resource_manager: Implement VKFenceWatch
A fence watch is used to keep track of the usage of a fence and protect
a resource or set of resources without having to inherit from their
handlers.
2019-02-14 18:44:26 -03:00
ReinUsesLisp
25c2fe1c6b vk_resource_manager: Implement VKFence
Fences take ownership of objects, protecting them from GPU-side or
driver-side concurrent access. They must be commited from the resource
manager. Their usage flow is: commit the fence from the resource
manager, protect resources with it and use them, send the fence to an
execution queue and Wait for it if needed and then call Release. Used
resources will automatically be signaled when they are free to be
reused.
2019-02-14 18:44:26 -03:00
ReinUsesLisp
33a4cebc22 vk_resource_manager: Add VKResource interface
VKResource is an interface that gets signaled by a fence when it is free
to be reused.
2019-02-14 18:36:15 -03:00
Fernando Sahmkow
10682ad7e0 shader_decompiler: Improve Accuracy of Attribute Interpolation. 2019-02-14 03:25:07 -04:00
Fernando Sahmkow
bb41683394 rasterizer_cache_gl: Only do fast layered copy on the same format. As
glCopyImageSubData does not support different formats.
2019-02-13 16:55:00 -04:00
Lioncash
054e39647c gl_rasterizer_cache: Remove unnecessary newline 2019-02-12 16:56:19 -05:00
Lioncash
e25c464c02 gl_rasterizer_cache: Get rid of variable shadowing
Avoids shadowing the members of the struct itself, which results in a
-Wshadow warning.
2019-02-12 16:46:39 -05:00
ReinUsesLisp
e60d4d70bc gl_shader_decompiler: Re-implement TLDS lod 2019-02-12 17:03:07 -03:00
140 changed files with 3805 additions and 1260 deletions

View File

@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr
It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.
yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

View File

@@ -46,16 +46,18 @@ struct AudioRendererParameter {
u32_le sample_rate;
u32_le sample_count;
u32_le mix_buffer_count;
u32_le unknown_c;
u32_le submix_count;
u32_le voice_count;
u32_le sink_count;
u32_le effect_count;
u32_le unknown_1c;
u8 unknown_20;
INSERT_PADDING_BYTES(3);
u32_le performance_frame_count;
u8 is_voice_drop_enabled;
u8 unknown_21;
u8 unknown_22;
u8 execution_mode;
u32_le splitter_count;
u32_le unknown_2c;
INSERT_PADDING_WORDS(1);
u32_le num_splitter_send_channels;
u32_le unknown_30;
u32_le revision;
};
static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");

View File

@@ -21,7 +21,7 @@ public:
Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}
/// Returns the raw audio data for the buffer
std::vector<s16>& Samples() {
std::vector<s16>& GetSamples() {
return samples;
}

View File

@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
}
}
state.yn1 = yn1;
state.yn2 = yn2;
state.yn1 = static_cast<s16>(yn1);
state.yn2 = static_cast<s16>(yn2);
return ret;
}

View File

@@ -12,6 +12,10 @@
#include "common/ring_buffer.h"
#include "core/settings.h"
#ifdef _MSC_VER
#include <objbase.h>
#endif
namespace AudioCore {
class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
}
}
~CubebSinkStream() {
~CubebSinkStream() override {
if (!ctx) {
return;
}
@@ -75,11 +79,11 @@ public:
queue.Push(samples);
}
std::size_t SamplesInQueue(u32 num_channels) const override {
std::size_t SamplesInQueue(u32 channel_count) const override {
if (!ctx)
return 0;
return queue.Size() / num_channels;
return queue.Size() / channel_count;
}
void Flush() override {
@@ -98,7 +102,7 @@ private:
u32 num_channels{};
Common::RingBuffer<s16, 0x10000> queue;
std::array<s16, 2> last_frame;
std::array<s16, 2> last_frame{};
std::atomic<bool> should_flush{};
TimeStretcher time_stretch;
@@ -108,6 +112,11 @@ private:
};
CubebSink::CubebSink(std::string_view target_device_name) {
// Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
#ifdef _MSC_VER
com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
#endif
if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
}
cubeb_destroy(ctx);
#ifdef _MSC_VER
if (SUCCEEDED(com_init_result)) {
CoUninitialize();
}
#endif
}
SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,

View File

@@ -25,6 +25,10 @@ private:
cubeb* ctx{};
cubeb_devid output_device{};
std::vector<SinkStreamPtr> sink_streams;
#ifdef _MSC_VER
u32 com_init_result = 0;
#endif
};
std::vector<std::string> ListCubebSinkDevices();

View File

@@ -95,7 +95,7 @@ void Stream::PlayNextBuffer() {
active_buffer = queued_buffers.front();
queued_buffers.pop();
VolumeAdjustSamples(active_buffer->Samples());
VolumeAdjustSamples(active_buffer->GetSamples());
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

View File

@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
/**
* Decode a color stored in RGBA8 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
return {bytes[3], bytes[2], bytes[1], bytes[0]};
}
/**
* Decode a color stored in RGB8 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
return {bytes[2], bytes[1], bytes[0], 255};
}
/**
* Decode a color stored in RG8 (aka HILO8) format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
return {bytes[1], bytes[0], 0, 255};
}
/**
* Decode a color stored in RGB565 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
/**
* Decode a color stored in RGB5A1 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
/**
* Decode a color stored in RGBA4 format
* @param bytes Pointer to encoded source color
* @return Result color decoded as Math::Vec4<u8>
* @return Result color decoded as Common::Vec4<u8>
*/
inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
u16_le pixel;
std::memcpy(&pixel, bytes, sizeof(pixel));
return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
/**
* Decode a depth value and a stencil value stored in D24S8 format
* @param bytes Pointer to encoded source values
* @return Resulting values stored as a Math::Vec2
* @return Resulting values stored as a Common::Vec2
*/
inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
}
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
bytes[3] = color.r();
bytes[2] = color.g();
bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
bytes[2] = color.r();
bytes[1] = color.g();
bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
bytes[1] = color.r();
bytes[0] = color.g();
}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
const u16_le data =
(Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
(Convert8To5(color.b()) << 1) | Convert8To1(color.a());
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
*/
inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
(Convert8To4(color.b()) << 4) | Convert8To4(color.a());

View File

@@ -39,10 +39,10 @@ public:
Impl(Impl const&) = delete;
const Impl& operator=(Impl const&) = delete;
void PushEntry(Entry e) {
std::lock_guard<std::mutex> lock(message_mutex);
message_queue.Push(std::move(e));
message_cv.notify_one();
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
message_queue.Push(
CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
}
void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,15 +86,13 @@ private:
}
};
while (true) {
{
std::unique_lock<std::mutex> lock(message_mutex);
message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
}
if (!running) {
entry = message_queue.PopWait();
if (entry.final_entry) {
break;
}
write_logs(entry);
}
// Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
// where a system is repeatedly spamming logs even on close.
const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
}
~Impl() {
running = false;
message_cv.notify_one();
Entry entry;
entry.final_entry = true;
message_queue.Push(entry);
backend_thread.join();
}
std::atomic_bool running{true};
std::mutex message_mutex, writing_mutex;
std::condition_variable message_cv;
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message) const {
using std::chrono::duration_cast;
using std::chrono::steady_clock;
Entry entry;
entry.timestamp =
duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
entry.log_class = log_class;
entry.log_level = log_level;
entry.filename = Common::TrimSourcePath(filename);
entry.line_num = line_nr;
entry.function = function;
entry.message = std::move(message);
return entry;
}
std::mutex writing_mutex;
std::thread backend_thread;
std::vector<std::unique_ptr<Backend>> backends;
Common::MPSCQueue<Log::Entry> message_queue;
Filter filter;
std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
};
void ConsoleBackend::Write(const Entry& entry) {
@@ -276,25 +292,6 @@ const char* GetLevelName(Level log_level) {
#undef LVL
}
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message) {
using std::chrono::duration_cast;
using std::chrono::steady_clock;
static steady_clock::time_point time_origin = steady_clock::now();
Entry entry;
entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
entry.log_class = log_class;
entry.log_level = log_level;
entry.filename = Common::TrimSourcePath(filename);
entry.line_num = line_nr;
entry.function = function;
entry.message = std::move(message);
return entry;
}
void SetGlobalFilter(const Filter& filter) {
Impl::Instance().SetGlobalFilter(filter);
}
@@ -319,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
if (!filter.CheckMessage(log_class, log_level))
return;
Entry entry =
CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
instance.PushEntry(std::move(entry));
instance.PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
}
} // namespace Log

View File

@@ -27,6 +27,7 @@ struct Entry {
unsigned int line_num;
std::string function;
std::string message;
bool final_entry = false;
Entry() = default;
Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
*/
const char* GetLevelName(Level log_level);
/// Creates a log entry by formatting the given source location, and message.
Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
const char* function, std::string message);
/**
* The global filter will prevent any messages from even being processed if they are filtered. Each
* backend can have a filter, but if the level is lower than the global filter, the backend will

View File

@@ -7,7 +7,7 @@
#include <cstdlib>
#include <type_traits>
namespace MathUtil {
namespace Common {
constexpr float PI = 3.14159265f;
@@ -41,4 +41,4 @@ struct Rectangle {
}
};
} // namespace MathUtil
} // namespace Common

View File

@@ -6,12 +6,12 @@
#include "common/vector_math.h"
namespace Math {
namespace Common {
template <typename T>
class Quaternion {
public:
Math::Vec3<T> xyz;
Vec3<T> xyz;
T w{};
Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
};
template <typename T>
auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
}
inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
return {axis * std::sin(angle / 2), std::cos(angle / 2)};
}
} // namespace Math
} // namespace Common

View File

@@ -28,8 +28,8 @@
#include <cstring>
#include "common/common_types.h"
// GCC 4.6+
#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
// GCC
#ifdef __GNUC__
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
#endif
// LLVM/clang
#elif __clang__
#elif defined(__clang__)
#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1

View File

@@ -8,6 +8,7 @@
// single reader, single writer queue
#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <mutex>
#include <utility>
@@ -45,6 +46,7 @@ public:
ElementPtr* new_ptr = new ElementPtr();
write_ptr->next.store(new_ptr, std::memory_order_release);
write_ptr = new_ptr;
cv.notify_one();
++size;
}
@@ -74,6 +76,16 @@ public:
return true;
}
T PopWait() {
if (Empty()) {
std::unique_lock<std::mutex> lock(cv_mutex);
cv.wait(lock, [this]() { return !Empty(); });
}
T t;
Pop(t);
return t;
}
// not thread-safe
void Clear() {
size.store(0);
@@ -101,6 +113,8 @@ private:
ElementPtr* write_ptr;
ElementPtr* read_ptr;
std::atomic_size_t size{0};
std::mutex cv_mutex;
std::condition_variable cv;
};
// a simple thread-safe,
@@ -135,6 +149,10 @@ public:
return spsc_queue.Pop(t);
}
T PopWait() {
return spsc_queue.PopWait();
}
// not thread-safe
void Clear() {
spsc_queue.Clear();

View File

@@ -33,7 +33,7 @@
#include <cmath>
#include <type_traits>
namespace Math {
namespace Common {
template <typename T>
class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
return MakeVec(x, yzw[0], yzw[1], yzw[2]);
}
} // namespace Math
} // namespace Common

View File

@@ -217,6 +217,7 @@ add_library(core STATIC
hle/service/audio/audren_u.h
hle/service/audio/codecctl.cpp
hle/service/audio/codecctl.h
hle/service/audio/errors.h
hle/service/audio/hwopus.cpp
hle/service/audio/hwopus.h
hle/service/bcat/bcat.cpp
@@ -400,6 +401,10 @@ add_library(core STATIC
hle/service/time/time.h
hle/service/usb/usb.cpp
hle/service/usb/usb.h
hle/service/vi/display/vi_display.cpp
hle/service/vi/display/vi_display.h
hle/service/vi/layer/vi_layer.cpp
hle/service/vi/layer/vi_layer.h
hle/service/vi/vi.cpp
hle/service/vi/vi.h
hle/service/vi/vi_m.cpp

View File

@@ -36,7 +36,8 @@
#include "frontend/applets/software_keyboard.h"
#include "frontend/applets/web_browser.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/gpu.h"
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
return vfs->OpenFile(path, FileSys::Mode::Read);
}
struct System::Impl {
explicit Impl(System& system) : kernel{system} {}
Cpu& CurrentCpuCore() {
return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
LOG_DEBUG(HW_Memory, "initialized OK");
core_timing.Initialize();
kernel.Initialize(core_timing);
kernel.Initialize();
const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch());
@@ -128,10 +130,16 @@ struct System::Impl {
return ResultStatus::ErrorVideoCore;
}
gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
is_powered_on = true;
if (Settings::values.use_asynchronous_gpu_emulation) {
gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
} else {
gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
}
cpu_core_manager.Initialize(system);
is_powered_on = true;
LOG_DEBUG(Core, "Initialized OK");
// Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {
void Shutdown() {
// Log last frame performance stats
auto perf_results = GetAndResetPerfStats();
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
perf_results.emulation_speed * 100.0);
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
perf_results.game_fps);
Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
perf_results.frametime * 1000.0);
const auto perf_results = GetAndResetPerfStats();
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
perf_results.emulation_speed * 100.0);
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
perf_results.game_fps);
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
perf_results.frametime * 1000.0);
is_powered_on = false;
@@ -265,7 +273,7 @@ struct System::Impl {
Core::FrameLimiter frame_limiter;
};
System::System() : impl{std::make_unique<Impl>()} {}
System::System() : impl{std::make_unique<Impl>(*this)} {}
System::~System() = default;
Cpu& System::CurrentCpuCore() {

View File

@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
return System::GetInstance().CurrentArmInterface();
}
inline TelemetrySession& Telemetry() {
return System::GetInstance().TelemetrySession();
}
inline Kernel::Process* CurrentProcess() {
return System::GetInstance().CurrentProcess();
}

View File

@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
}
void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
std::ifstream file(filename);
std::ifstream file;
OpenFStream(file, filename, std::ios_base::in);
if (!file.is_open())
return;

View File

@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
if (offset + length > data.size())
data.resize(offset + length);
const auto write = std::min(length, data.size() - offset);
std::memcpy(data.data(), data_, write);
std::memcpy(data.data() + offset, data_, write);
return write;
}

View File

@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
}
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
new_x = std::max(new_x, framebuffer_layout.screen.left);
new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

View File

@@ -166,7 +166,7 @@ private:
/**
* Clip the provided coordinates to be inside the touchscreen area.
*/
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
};
} // namespace Core::Frontend

View File

@@ -12,12 +12,12 @@ namespace Layout {
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
template <class T>
static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
float screen_aspect_ratio) {
static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
float screen_aspect_ratio) {
float scale = std::min(static_cast<float>(window_area.GetWidth()),
window_area.GetHeight() / screen_aspect_ratio);
return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
static_cast<T>(std::round(scale * screen_aspect_ratio))};
return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
static_cast<T>(std::round(scale * screen_aspect_ratio))};
}
FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
ScreenUndocked::Width};
MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
float window_aspect_ratio = static_cast<float>(height) / width;

View File

@@ -16,7 +16,7 @@ struct FramebufferLayout {
unsigned width{ScreenUndocked::Width};
unsigned height{ScreenUndocked::Height};
MathUtil::Rectangle<unsigned> screen;
Common::Rectangle<unsigned> screen;
/**
* Returns the ration of pixel size of the screen, compared to the native size of the undocked

View File

@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
* Orientation is determined by right-hand rule.
* Units: deg/sec
*/
using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
/**
* A touch device is an input device that returns a tuple of two floats and a bool. The floats are

View File

@@ -4,10 +4,10 @@
#pragma once
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/kernel/errors.h"
#include "core/memory.h"
namespace IPC {

View File

@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
@@ -18,32 +19,143 @@
#include "core/memory.h"
namespace Kernel {
namespace AddressArbiter {
namespace {
// Wake up num_to_wake (or all) threads in a vector.
void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
if (num_to_wake > 0) {
last = num_to_wake;
}
// Performs actual address waiting logic.
static ResultCode WaitForAddress(VAddr address, s64 timeout) {
SharedPtr<Thread> current_thread = GetCurrentThread();
// Signal the waiting threads.
for (std::size_t i = 0; i < last; i++) {
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
waiting_threads[i]->SetArbiterWaitAddress(0);
waiting_threads[i]->ResumeFromWait();
}
}
} // Anonymous namespace
AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
AddressArbiter::~AddressArbiter() = default;
ResultCode AddressArbiter::SignalToAddress(VAddr address, s32 num_to_wake) {
const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
WakeThreads(waiting_threads, num_to_wake);
return RESULT_SUCCESS;
}
ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake) {
// Ensure that we can write to the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
if (static_cast<s32>(Memory::Read32(address)) != value) {
return ERR_INVALID_STATE;
}
Memory::Write32(address, static_cast<u32>(value + 1));
return SignalToAddress(address, num_to_wake);
}
ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake) {
// Ensure that we can write to the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
// Get threads waiting on the address.
const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
// Determine the modified value depending on the waiting count.
s32 updated_value;
if (waiting_threads.empty()) {
updated_value = value - 1;
} else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
updated_value = value + 1;
} else {
updated_value = value;
}
if (static_cast<s32>(Memory::Read32(address)) != value) {
return ERR_INVALID_STATE;
}
Memory::Write32(address, static_cast<u32>(updated_value));
WakeThreads(waiting_threads, num_to_wake);
return RESULT_SUCCESS;
}
ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
bool should_decrement) {
// Ensure that we can read the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
const s32 cur_value = static_cast<s32>(Memory::Read32(address));
if (cur_value >= value) {
return ERR_INVALID_STATE;
}
if (should_decrement) {
Memory::Write32(address, static_cast<u32>(cur_value - 1));
}
// Short-circuit without rescheduling, if timeout is zero.
if (timeout == 0) {
return RESULT_TIMEOUT;
}
return WaitForAddress(address, timeout);
}
ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
// Ensure that we can read the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
// Only wait for the address if equal.
if (static_cast<s32>(Memory::Read32(address)) != value) {
return ERR_INVALID_STATE;
}
// Short-circuit without rescheduling, if timeout is zero.
if (timeout == 0) {
return RESULT_TIMEOUT;
}
return WaitForAddress(address, timeout);
}
ResultCode AddressArbiter::WaitForAddress(VAddr address, s64 timeout) {
SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
current_thread->SetArbiterWaitAddress(address);
current_thread->SetStatus(ThreadStatus::WaitArb);
current_thread->InvalidateWakeupCallback();
current_thread->WakeAfterDelay(timeout);
Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
return RESULT_TIMEOUT;
}
// Gets the threads waiting on an address.
static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
const auto RetrieveWaitingThreads = [](std::size_t core_index,
std::vector<SharedPtr<Thread>>& waiting_threads,
VAddr arb_addr) {
const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
const auto RetrieveWaitingThreads = [this](std::size_t core_index,
std::vector<SharedPtr<Thread>>& waiting_threads,
VAddr arb_addr) {
const auto& scheduler = system.Scheduler(core_index);
const auto& thread_list = scheduler.GetThreadList();
for (const auto& thread : thread_list) {
if (thread->GetArbiterWaitAddress() == arb_addr)
if (thread->GetArbiterWaitAddress() == arb_addr) {
waiting_threads.push_back(thread);
}
}
};
@@ -62,119 +174,4 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)
return threads;
}
// Wake up num_to_wake (or all) threads in a vector.
static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
if (num_to_wake > 0)
last = num_to_wake;
// Signal the waiting threads.
for (std::size_t i = 0; i < last; i++) {
ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
waiting_threads[i]->SetArbiterWaitAddress(0);
waiting_threads[i]->ResumeFromWait();
}
}
// Signals an address being waited on.
ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
WakeThreads(waiting_threads, num_to_wake);
return RESULT_SUCCESS;
}
// Signals an address being waited on and increments its value if equal to the value argument.
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
// Ensure that we can write to the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
if (static_cast<s32>(Memory::Read32(address)) == value) {
Memory::Write32(address, static_cast<u32>(value + 1));
} else {
return ERR_INVALID_STATE;
}
return SignalToAddress(address, num_to_wake);
}
// Signals an address being waited on and modifies its value based on waiting thread count if equal
// to the value argument.
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake) {
// Ensure that we can write to the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
// Get threads waiting on the address.
std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
// Determine the modified value depending on the waiting count.
s32 updated_value;
if (waiting_threads.empty()) {
updated_value = value - 1;
} else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
updated_value = value + 1;
} else {
updated_value = value;
}
if (static_cast<s32>(Memory::Read32(address)) == value) {
Memory::Write32(address, static_cast<u32>(updated_value));
} else {
return ERR_INVALID_STATE;
}
WakeThreads(waiting_threads, num_to_wake);
return RESULT_SUCCESS;
}
// Waits on an address if the value passed is less than the argument value, optionally decrementing.
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
// Ensure that we can read the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
s32 cur_value = static_cast<s32>(Memory::Read32(address));
if (cur_value < value) {
if (should_decrement) {
Memory::Write32(address, static_cast<u32>(cur_value - 1));
}
} else {
return ERR_INVALID_STATE;
}
// Short-circuit without rescheduling, if timeout is zero.
if (timeout == 0) {
return RESULT_TIMEOUT;
}
return WaitForAddress(address, timeout);
}
// Waits on an address if the value passed is equal to the argument value.
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
// Ensure that we can read the address.
if (!Memory::IsValidVirtualAddress(address)) {
return ERR_INVALID_ADDRESS_STATE;
}
// Only wait for the address if equal.
if (static_cast<s32>(Memory::Read32(address)) != value) {
return ERR_INVALID_STATE;
}
// Short-circuit without rescheduling, if timeout is zero.
if (timeout == 0) {
return RESULT_TIMEOUT;
}
return WaitForAddress(address, timeout);
}
} // namespace AddressArbiter
} // namespace Kernel

View File

@@ -5,30 +5,68 @@
#pragma once
#include "common/common_types.h"
#include "core/hle/kernel/address_arbiter.h"
union ResultCode;
namespace Core {
class System;
}
namespace Kernel {
namespace AddressArbiter {
enum class ArbitrationType {
WaitIfLessThan = 0,
DecrementAndWaitIfLessThan = 1,
WaitIfEqual = 2,
class Thread;
class AddressArbiter {
public:
enum class ArbitrationType {
WaitIfLessThan = 0,
DecrementAndWaitIfLessThan = 1,
WaitIfEqual = 2,
};
enum class SignalType {
Signal = 0,
IncrementAndSignalIfEqual = 1,
ModifyByWaitingCountAndSignalIfEqual = 2,
};
explicit AddressArbiter(Core::System& system);
~AddressArbiter();
AddressArbiter(const AddressArbiter&) = delete;
AddressArbiter& operator=(const AddressArbiter&) = delete;
AddressArbiter(AddressArbiter&&) = default;
AddressArbiter& operator=(AddressArbiter&&) = delete;
/// Signals an address being waited on.
ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
/// Signals an address being waited on and increments its value if equal to the value argument.
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
/// Signals an address being waited on and modifies its value based on waiting thread count if
/// equal to the value argument.
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake);
/// Waits on an address if the value passed is less than the argument value,
/// optionally decrementing.
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
bool should_decrement);
/// Waits on an address if the value passed is equal to the argument value.
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
private:
// Waits on the given address with a timeout in nanoseconds
ResultCode WaitForAddress(VAddr address, s64 timeout);
// Gets the threads waiting on an address.
std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
Core::System& system;
};
enum class SignalType {
Signal = 0,
IncrementAndSignalIfEqual = 1,
ModifyByWaitingCountAndSignalIfEqual = 2,
};
ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
} // namespace AddressArbiter
} // namespace Kernel

View File

@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};

View File

@@ -14,32 +14,47 @@
namespace Kernel {
namespace {
constexpr u16 GetSlot(Handle handle) {
return handle >> 15;
return static_cast<u16>(handle >> 15);
}
constexpr u16 GetGeneration(Handle handle) {
return handle & 0x7FFF;
return static_cast<u16>(handle & 0x7FFF);
}
} // Anonymous namespace
HandleTable::HandleTable() {
next_generation = 1;
Clear();
}
HandleTable::~HandleTable() = default;
ResultCode HandleTable::SetSize(s32 handle_table_size) {
if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
return ERR_OUT_OF_MEMORY;
}
// Values less than or equal to zero indicate to use the maximum allowable
// size for the handle table in the actual kernel, so we ignore the given
// value in that case, since we assume this by default unless this function
// is called.
if (handle_table_size > 0) {
table_size = static_cast<u16>(handle_table_size);
}
return RESULT_SUCCESS;
}
ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
DEBUG_ASSERT(obj != nullptr);
u16 slot = next_free_slot;
if (slot >= generations.size()) {
const u16 slot = next_free_slot;
if (slot >= table_size) {
LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
return ERR_HANDLE_TABLE_FULL;
}
next_free_slot = generations[slot];
u16 generation = next_generation++;
const u16 generation = next_generation++;
// Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
// Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
}
ResultCode HandleTable::Close(Handle handle) {
if (!IsValid(handle))
if (!IsValid(handle)) {
return ERR_INVALID_HANDLE;
}
u16 slot = GetSlot(handle);
const u16 slot = GetSlot(handle);
objects[slot] = nullptr;
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
}
bool HandleTable::IsValid(Handle handle) const {
std::size_t slot = GetSlot(handle);
u16 generation = GetGeneration(handle);
const std::size_t slot = GetSlot(handle);
const u16 generation = GetGeneration(handle);
return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
}
SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
}
void HandleTable::Clear() {
for (u16 i = 0; i < MAX_COUNT; ++i) {
for (u16 i = 0; i < table_size; ++i) {
generations[i] = i + 1;
objects[i] = nullptr;
}

View File

@@ -49,6 +49,20 @@ public:
HandleTable();
~HandleTable();
/**
* Sets the number of handles that may be in use at one time
* for this handle table.
*
* @param handle_table_size The desired size to limit the handle table to.
*
* @returns an error code indicating if initialization was successful.
* If initialization was not successful, then ERR_OUT_OF_MEMORY
* will be returned.
*
* @pre handle_table_size must be within the range [0, 1024]
*/
ResultCode SetSize(s32 handle_table_size);
/**
* Allocates a handle for the given object.
* @return The created Handle or one of the following errors:
@@ -103,14 +117,21 @@ private:
*/
std::array<u16, MAX_COUNT> generations;
/**
* The limited size of the handle table. This can be specified by process
* capabilities in order to restrict the overall number of handles that
* can be created in a process instance
*/
u16 table_size = static_cast<u16>(MAX_COUNT);
/**
* Global counter of the number of created handles. Stored in `generations` when a handle is
* created, and wraps around to 1 when it hits 0x8000.
*/
u16 next_generation;
u16 next_generation = 1;
/// Head of the free slots linked list.
u16 next_free_slot;
u16 next_free_slot = 0;
};
} // namespace Kernel

View File

@@ -15,6 +15,8 @@
#include "core/hle/ipc.h"
#include "core/hle/kernel/object.h"
union ResultCode;
namespace Service {
class ServiceFrameworkBase;
}
@@ -208,14 +210,12 @@ public:
template <typename T>
SharedPtr<T> GetCopyObject(std::size_t index) {
ASSERT(index < copy_objects.size());
return DynamicObjectCast<T>(copy_objects[index]);
return DynamicObjectCast<T>(copy_objects.at(index));
}
template <typename T>
SharedPtr<T> GetMoveObject(std::size_t index) {
ASSERT(index < move_objects.size());
return DynamicObjectCast<T>(move_objects[index]);
return DynamicObjectCast<T>(move_objects.at(index));
}
void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +232,7 @@ public:
template <typename T>
std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
return std::static_pointer_cast<T>(domain_request_handlers[index]);
return std::static_pointer_cast<T>(domain_request_handlers.at(index));
}
void SetDomainRequestHandlers(

View File

@@ -12,6 +12,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
}
struct KernelCore::Impl {
void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
explicit Impl(Core::System& system) : address_arbiter{system}, system{system} {}
void Initialize(KernelCore& kernel) {
Shutdown();
InitializeSystemResourceLimit(kernel);
InitializeThreads(core_timing);
InitializeThreads();
}
void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
}
void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
void InitializeThreads() {
thread_wakeup_event_type =
core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
}
std::atomic<u32> next_object_id{0};
@@ -135,6 +138,8 @@ struct KernelCore::Impl {
std::vector<SharedPtr<Process>> process_list;
Process* current_process = nullptr;
Kernel::AddressArbiter address_arbiter;
SharedPtr<ResourceLimit> system_resource_limit;
Core::Timing::EventType* thread_wakeup_event_type = nullptr;
@@ -145,15 +150,18 @@ struct KernelCore::Impl {
/// Map of named ports managed by the kernel, which can be retrieved using
/// the ConnectToPort SVC.
NamedPortTable named_ports;
// System context
Core::System& system;
};
KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
KernelCore::~KernelCore() {
Shutdown();
}
void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
impl->Initialize(*this, core_timing);
void KernelCore::Initialize() {
impl->Initialize(*this);
}
void KernelCore::Shutdown() {
@@ -184,6 +192,14 @@ const Process* KernelCore::CurrentProcess() const {
return impl->current_process;
}
AddressArbiter& KernelCore::AddressArbiter() {
return impl->address_arbiter;
}
const AddressArbiter& KernelCore::AddressArbiter() const {
return impl->address_arbiter;
}
void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
impl->named_ports.emplace(std::move(name), std::move(port));
}

View File

@@ -11,6 +11,10 @@
template <typename T>
class ResultVal;
namespace Core {
class System;
}
namespace Core::Timing {
class CoreTiming;
struct EventType;
@@ -18,6 +22,7 @@ struct EventType;
namespace Kernel {
class AddressArbiter;
class ClientPort;
class HandleTable;
class Process;
@@ -30,7 +35,14 @@ private:
using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
public:
KernelCore();
/// Constructs an instance of the kernel using the given System
/// instance as a context for any necessary system-related state,
/// such as threads, CPU core state, etc.
///
/// @post After execution of the constructor, the provided System
/// object *must* outlive the kernel instance itself.
///
explicit KernelCore(Core::System& system);
~KernelCore();
KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
KernelCore& operator=(KernelCore&&) = delete;
/// Resets the kernel to a clean slate for use.
///
/// @param core_timing CoreTiming instance used to create any necessary
/// kernel-specific callback events.
///
void Initialize(Core::Timing::CoreTiming& core_timing);
void Initialize();
/// Clears all resources in use by the kernel instance.
void Shutdown();
@@ -67,6 +75,12 @@ public:
/// Retrieves a const pointer to the current process.
const Process* CurrentProcess() const;
/// Provides a reference to the kernel's address arbiter.
Kernel::AddressArbiter& AddressArbiter();
/// Provides a const reference to the kernel's address arbiter.
const Kernel::AddressArbiter& AddressArbiter() const;
/// Adds a port to the named port table
void AddNamedPort(std::string name, SharedPtr<ClientPort> port);

View File

@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
vm_manager.Reset(metadata.GetAddressSpaceType());
const auto& caps = metadata.GetKernelCapabilities();
return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
const auto capability_init_result =
capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
if (capability_init_result.IsError()) {
return capability_init_result;
}
return handle_table.SetSize(capabilities.GetHandleTableSize());
}
void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {

View File

@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
interrupt_capabilities.set();
// Allow using the maximum possible amount of handles
handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
// Allow all debugging capabilities.
is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
return ERR_RESERVED_VALUE;
}
handle_table_size = (flags >> 16) & 0x3FF;
handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
return RESULT_SUCCESS;
}

View File

@@ -156,7 +156,7 @@ public:
}
/// Gets the number of total allowable handles for the process' handle table.
u32 GetHandleTableSize() const {
s32 GetHandleTableSize() const {
return handle_table_size;
}
@@ -252,7 +252,7 @@ private:
u64 core_mask = 0;
u64 priority_mask = 0;
u32 handle_table_size = 0;
s32 handle_table_size = 0;
u32 kernel_version = 0;
ProgramType program_type = ProgramType::SysModule;

View File

@@ -20,6 +20,7 @@
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
return address + size > address;
}
// Checks if a given address range lies within a larger address range.
constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
VAddr address_range_end) {
const VAddr end_address = address + size - 1;
return address_range_begin <= address && end_address <= address_range_end - 1;
}
bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
vm.GetAddressSpaceEndAddress());
}
bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
vm.GetNewMapRegionEndAddress());
}
// 8 GiB
constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;
@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
return ERR_INVALID_ADDRESS_STATE;
}
if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
LOG_ERROR(Kernel_SVC,
"Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
src_addr, size);
return ERR_INVALID_ADDRESS_STATE;
}
if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
LOG_ERROR(Kernel_SVC,
"Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
auto* const current_process = Core::CurrentProcess();
auto& vm_manager = current_process->VMManager();
if (!IsInsideAddressSpace(vm_manager, addr, size)) {
if (!vm_manager.IsWithinAddressSpace(addr, size)) {
LOG_ERROR(Kernel_SVC,
"Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
}
auto& vm_manager = Core::CurrentProcess()->VMManager();
if (!IsInsideAddressSpace(vm_manager, address, size)) {
if (!vm_manager.IsWithinAddressSpace(address, size)) {
LOG_ERROR(Kernel_SVC,
"Given address (0x{:016X}) is outside the bounds of the address space.", address);
return ERR_INVALID_ADDRESS_STATE;
@@ -1495,13 +1479,14 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
return ERR_INVALID_ADDRESS;
}
auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
case AddressArbiter::ArbitrationType::WaitIfLessThan:
return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, false);
case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, true);
case AddressArbiter::ArbitrationType::WaitIfEqual:
return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
return address_arbiter.WaitForAddressIfEqual(address, value, timeout);
default:
LOG_ERROR(Kernel_SVC,
"Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
@@ -1526,13 +1511,14 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
return ERR_INVALID_ADDRESS;
}
auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
switch (static_cast<AddressArbiter::SignalType>(type)) {
case AddressArbiter::SignalType::Signal:
return AddressArbiter::SignalToAddress(address, num_to_wake);
return address_arbiter.SignalToAddress(address, num_to_wake);
case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
return address_arbiter.IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
return address_arbiter.ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
num_to_wake);
default:
LOG_ERROR(Kernel_SVC,

View File

@@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
return ERR_INVALID_PROCESSOR_ID;
}
// TODO(yuriks): Other checks, returning 0xD9001BEA
if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
// TODO (bunnei): Find the correct error code to use here

View File

@@ -17,8 +17,8 @@
#include "core/memory_setup.h"
namespace Kernel {
static const char* GetMemoryStateName(MemoryState state) {
namespace {
const char* GetMemoryStateName(MemoryState state) {
static constexpr const char* names[] = {
"Unmapped", "Io",
"Normal", "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
return names[ToSvcMemoryState(state)];
}
// Checks if a given address range lies within a larger address range.
constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
VAddr address_range_end) {
const VAddr end_address = address + size - 1;
return address_range_begin <= address && end_address <= address_range_end - 1;
}
} // Anonymous namespace
bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
ASSERT(base + size == next.base);
if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
}
ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
target + size < target) {
if (!IsWithinHeapRegion(target, size)) {
return ERR_INVALID_ADDRESS;
}
@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
}
ResultCode VMManager::HeapFree(VAddr target, u64 size) {
if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
target + size < target) {
if (!IsWithinHeapRegion(target, size)) {
return ERR_INVALID_ADDRESS;
}
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
return address_space_width;
}
bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
GetAddressSpaceEndAddress());
}
VAddr VMManager::GetASLRRegionBaseAddress() const {
return aslr_region_base;
}
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
return code_region_end - code_region_base;
}
bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
GetCodeRegionEndAddress());
}
VAddr VMManager::GetHeapRegionBaseAddress() const {
return heap_region_base;
}
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
return heap_region_end - heap_region_base;
}
bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
GetHeapRegionEndAddress());
}
VAddr VMManager::GetMapRegionBaseAddress() const {
return map_region_base;
}
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
return map_region_end - map_region_base;
}
bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
}
VAddr VMManager::GetNewMapRegionBaseAddress() const {
return new_map_region_base;
}
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
return new_map_region_end - new_map_region_base;
}
bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
GetNewMapRegionEndAddress());
}
VAddr VMManager::GetTLSIORegionBaseAddress() const {
return tls_io_region_base;
}
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
return tls_io_region_end - tls_io_region_base;
}
bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
GetTLSIORegionEndAddress());
}
} // namespace Kernel

View File

@@ -432,18 +432,21 @@ public:
/// Gets the address space width in bits.
u64 GetAddressSpaceWidth() const;
/// Determines whether or not the given address range lies within the address space.
bool IsWithinAddressSpace(VAddr address, u64 size) const;
/// Gets the base address of the ASLR region.
VAddr GetASLRRegionBaseAddress() const;
/// Gets the end address of the ASLR region.
VAddr GetASLRRegionEndAddress() const;
/// Determines whether or not the specified address range is within the ASLR region.
bool IsWithinASLRRegion(VAddr address, u64 size) const;
/// Gets the size of the ASLR region
u64 GetASLRRegionSize() const;
/// Determines whether or not the specified address range is within the ASLR region.
bool IsWithinASLRRegion(VAddr address, u64 size) const;
/// Gets the base address of the code region.
VAddr GetCodeRegionBaseAddress() const;
@@ -453,6 +456,9 @@ public:
/// Gets the total size of the code region in bytes.
u64 GetCodeRegionSize() const;
/// Determines whether or not the specified range is within the code region.
bool IsWithinCodeRegion(VAddr address, u64 size) const;
/// Gets the base address of the heap region.
VAddr GetHeapRegionBaseAddress() const;
@@ -462,6 +468,9 @@ public:
/// Gets the total size of the heap region in bytes.
u64 GetHeapRegionSize() const;
/// Determines whether or not the specified range is within the heap region.
bool IsWithinHeapRegion(VAddr address, u64 size) const;
/// Gets the base address of the map region.
VAddr GetMapRegionBaseAddress() const;
@@ -471,6 +480,9 @@ public:
/// Gets the total size of the map region in bytes.
u64 GetMapRegionSize() const;
/// Determines whether or not the specified range is within the map region.
bool IsWithinMapRegion(VAddr address, u64 size) const;
/// Gets the base address of the new map region.
VAddr GetNewMapRegionBaseAddress() const;
@@ -480,6 +492,9 @@ public:
/// Gets the total size of the new map region in bytes.
u64 GetNewMapRegionSize() const;
/// Determines whether or not the given address range is within the new map region
bool IsWithinNewMapRegion(VAddr address, u64 size) const;
/// Gets the base address of the TLS IO region.
VAddr GetTLSIORegionBaseAddress() const;
@@ -489,6 +504,9 @@ public:
/// Gets the total size of the TLS IO region in bytes.
u64 GetTLSIORegionSize() const;
/// Determines if the given address range is within the TLS IO region.
bool IsWithinTLSIORegion(VAddr address, u64 size) const;
/// Each VMManager has its own page table, which is set as the main one when the owning process
/// is scheduled.
Memory::PageTable page_table;

View File

@@ -8,7 +8,6 @@
#include <utility>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
// All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes

View File

@@ -7,6 +7,7 @@
#include "common/string_util.h"
#include "core/core.h"
#include "core/frontend/applets/software_keyboard.h"
#include "core/hle/result.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applets/software_keyboard.h"

View File

@@ -9,10 +9,13 @@
#include <vector>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/swap.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applets/applets.h"
union ResultCode;
namespace Service::AM::Applets {
enum class KeysetDisable : u32 {

View File

@@ -18,17 +18,11 @@
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/audio/audout_u.h"
#include "core/hle/service/audio/errors.h"
#include "core/memory.h"
namespace Service::Audio {
namespace ErrCodes {
enum {
ErrorUnknown = 2,
BufferCountExceeded = 8,
};
}
constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
constexpr int DefaultSampleRate{48000};
@@ -100,7 +94,7 @@ private:
if (stream->IsPlaying()) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
rb.Push(ERR_OPERATION_FAILED);
return;
}
@@ -143,7 +137,8 @@ private:
if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
return;
}
IPC::ResponseBuilder rb{ctx, 2};

View File

@@ -17,6 +17,7 @@
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/audio/audren_u.h"
#include "core/hle/service/audio/errors.h"
namespace Service::Audio {
@@ -37,7 +38,7 @@ public:
{8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
{9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
{10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
{11, nullptr, "ExecuteAudioRendererRendering"},
{11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
};
// clang-format on
RegisterHandlers(functions);
@@ -138,6 +139,17 @@ private:
rb.Push(rendering_time_limit_percent);
}
void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
// This service command currently only reports an unsupported operation
// error code, or aborts. Given that, we just always return an error
// code in this case.
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERR_NOT_SUPPORTED);
}
Kernel::EventPair system_event;
std::unique_ptr<AudioCore::AudioRenderer> renderer;
u32 rendering_time_limit_percent = 100;
@@ -235,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
{0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
{2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
{3, nullptr, "OpenAudioRendererAuto"},
{3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
};
// clang-format on
@@ -248,12 +260,7 @@ AudRenU::~AudRenU() = default;
void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
IPC::RequestParser rp{ctx};
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
OpenAudioRendererImpl(ctx);
}
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -262,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
buffer_sz += params.unknown_c * 1024;
buffer_sz += 0x940 * (params.unknown_c + 1);
buffer_sz += params.submix_count * 1024;
buffer_sz += 0x940 * (params.submix_count + 1);
buffer_sz += 0x3F0 * params.voice_count;
buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
buffer_sz +=
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
buffer_sz += Common::AlignUp(
(0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
(params.mix_buffer_count + 6),
0x40);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
u32 count = params.unknown_c + 1;
const u32 count = params.submix_count + 1;
u64 node_count = Common::AlignUp(count, 0x40);
u64 node_state_buffer_sz =
const u64 node_state_buffer_sz =
4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
u64 edge_matrix_buffer_sz = 0;
node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
buffer_sz += 0xE0 * params.unknown_2c;
buffer_sz += 0xE0 * params.num_splitter_send_channels;
buffer_sz += 0x20 * params.splitter_count;
buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
}
buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
((params.voice_count * 256) | 0x40);
if (params.unknown_1c >= 1) {
if (params.performance_frame_count >= 1) {
output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
16 * params.voice_count + 16) +
0x658) *
(params.unknown_1c + 1) +
(params.performance_frame_count + 1) +
0xc0,
0x40) +
output_sz;
@@ -325,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
rb.PushIpcInterface<Audio::IAudioDevice>();
}
void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
OpenAudioRendererImpl(ctx);
}
void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
@@ -335,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
// based on the current revision
}
void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
rb.PushIpcInterface<IAudioRenderer>(params);
}
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {

View File

@@ -21,8 +21,11 @@ private:
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
enum class AudioFeatures : u32 {
Splitter,
};

View File

@@ -0,0 +1,15 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/hle/result.h"
namespace Service::Audio {
constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
} // namespace Service::Audio

View File

@@ -15,7 +15,7 @@ namespace Kernel {
class SharedMemory;
}
namespace SM {
namespace Service::SM {
class ServiceManager;
}

View File

@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
const MathUtil::Rectangle<int>& crop_rect) {
const Common::Rectangle<int>& crop_rect) {
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
auto& instance = Core::System::GetInstance();
instance.GetPerfStats().EndGameFrame();
instance.Renderer().SwapBuffers(framebuffer);
instance.GPU().SwapBuffers(framebuffer);
}
} // namespace Service::Nvidia::Devices

View File

@@ -25,7 +25,7 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
NVFlinger::BufferQueue::BufferTransformFlags transform,
const MathUtil::Rectangle<int>& crop_rect);
const Common::Rectangle<int>& crop_rect);
private:
std::shared_ptr<nvmap> nvmap_dev;

View File

@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
auto& gpu = system_instance.GPU();
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
ASSERT(cpu_addr);
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

View File

@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
return 0;
}
static void PushGPUEntries(Tegra::CommandList&& entries) {
if (entries.empty()) {
return;
}
auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
dma_pusher.Push(std::move(entries));
dma_pusher.DispatchCalls();
}
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
params.num_entries * sizeof(Tegra::CommandListHeader));
PushGPUEntries(std::move(entries));
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
params.fence_out.id = 0;
params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
Memory::ReadBlock(params.address, entries.data(),
params.num_entries * sizeof(Tegra::CommandListHeader));
PushGPUEntries(std::move(entries));
Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
params.fence_out.id = 0;
params.fence_out.value = 0;

View File

@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
}
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
const MathUtil::Rectangle<int>& crop_rect) {
const Common::Rectangle<int>& crop_rect) {
auto itr = std::find_if(queue.begin(), queue.end(),
[&](const Buffer& buffer) { return buffer.slot == slot; });
ASSERT(itr != queue.end());

View File

@@ -67,14 +67,14 @@ public:
Status status = Status::Free;
IGBPBuffer igbp_buffer;
BufferTransformFlags transform;
MathUtil::Rectangle<int> crop_rect;
Common::Rectangle<int> crop_rect;
};
void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
std::optional<u32> DequeueBuffer(u32 width, u32 height);
const IGBPBuffer& RequestBuffer(u32 slot) const;
void QueueBuffer(u32 slot, BufferTransformFlags transform,
const MathUtil::Rectangle<int>& crop_rect);
const Common::Rectangle<int>& crop_rect);
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
void ReleaseBuffer(u32 slot);
u32 Query(QueryType type);

View File

@@ -14,11 +14,12 @@
#include "core/core_timing_util.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/nvdrv.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "core/hle/service/nvflinger/nvflinger.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
#include "core/perf_stats.h"
#include "video_core/renderer_base.h"
@@ -28,6 +29,12 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
displays.emplace_back(0, "Default");
displays.emplace_back(1, "External");
displays.emplace_back(2, "Edid");
displays.emplace_back(3, "Internal");
displays.emplace_back(4, "Null");
// Schedule the screen composition events
composition_event =
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -52,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
// TODO(Subv): Currently we only support the Default display.
ASSERT(name == "Default");
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.name == name; });
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetName() == name; });
if (itr == displays.end()) {
return {};
}
return itr->id;
return itr->GetID();
}
std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -68,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
return {};
}
ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
const u64 layer_id = next_layer_id++;
const u32 buffer_queue_id = next_buffer_queue_id++;
auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
display->layers.emplace_back(layer_id, buffer_queue);
buffer_queues.emplace_back(std::move(buffer_queue));
buffer_queues.emplace_back(buffer_queue_id, layer_id);
display->CreateLayer(layer_id, buffer_queues.back());
return layer_id;
}
@@ -85,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
return {};
}
return layer->buffer_queue->GetId();
return layer->GetBufferQueue().GetId();
}
Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -95,20 +100,29 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
return nullptr;
}
return display->vsync_event.readable;
return display->GetVSyncEvent();
}
std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[&](const auto& queue) { return queue->GetId() == id; });
[id](const auto& queue) { return queue.GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
[id](const auto& queue) { return queue.GetId() == id; });
ASSERT(itr != buffer_queues.end());
return *itr;
}
VI::Display* NVFlinger::FindDisplay(u64 display_id) {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -117,9 +131,10 @@ Display* NVFlinger::FindDisplay(u64 display_id) {
return &*itr;
}
const Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr = std::find_if(displays.begin(), displays.end(),
[&](const Display& display) { return display.id == display_id; });
const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetID() == display_id; });
if (itr == displays.end()) {
return nullptr;
@@ -128,57 +143,41 @@ const Display* NVFlinger::FindDisplay(u64 display_id) const {
return &*itr;
}
Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
return display->FindLayer(layer_id);
}
const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
const auto* const display = FindDisplay(display_id);
if (display == nullptr) {
return nullptr;
}
const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
[&](const Layer& layer) { return layer.id == layer_id; });
if (itr == display->layers.end()) {
return nullptr;
}
return &*itr;
return display->FindLayer(layer_id);
}
void NVFlinger::Compose() {
for (auto& display : displays) {
// Trigger vsync for this display at the end of drawing
SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
SCOPE_EXIT({ display.SignalVSyncEvent(); });
// Don't do anything for displays without layers.
if (display.layers.empty())
if (!display.HasLayers())
continue;
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
Layer& layer = display.layers[0];
auto& buffer_queue = layer.buffer_queue;
VI::Layer& layer = display.GetLayer(0);
auto& buffer_queue = layer.GetBufferQueue();
// Search for a queued buffer and acquire it
auto buffer = buffer_queue->AcquireBuffer();
auto buffer = buffer_queue.AcquireBuffer();
MicroProfileFlip();
@@ -187,7 +186,7 @@ void NVFlinger::Compose() {
// There was no queued buffer to draw, render previous frame
system_instance.GetPerfStats().EndGameFrame();
system_instance.Renderer().SwapBuffers({});
system_instance.GPU().SwapBuffers({});
continue;
}
@@ -203,19 +202,8 @@ void NVFlinger::Compose() {
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);
buffer_queue->ReleaseBuffer(buffer->get().slot);
buffer_queue.ReleaseBuffer(buffer->get().slot);
}
}
Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
Layer::~Layer() = default;
Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;
} // namespace Service::NVFlinger

View File

@@ -4,7 +4,6 @@
#pragma once
#include <array>
#include <memory>
#include <optional>
#include <string>
@@ -26,31 +25,17 @@ class WritableEvent;
namespace Service::Nvidia {
class Module;
}
} // namespace Service::Nvidia
namespace Service::VI {
class Display;
class Layer;
} // namespace Service::VI
namespace Service::NVFlinger {
class BufferQueue;
struct Layer {
Layer(u64 id, std::shared_ptr<BufferQueue> queue);
~Layer();
u64 id;
std::shared_ptr<BufferQueue> buffer_queue;
};
struct Display {
Display(u64 id, std::string name);
~Display();
u64 id;
std::string name;
std::vector<Layer> layers;
Kernel::EventPair vsync_event;
};
class NVFlinger final {
public:
explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
@@ -80,7 +65,10 @@ public:
Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
/// Obtains a buffer queue identified by the ID.
std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
BufferQueue& FindBufferQueue(u32 id);
/// Obtains a buffer queue identified by the ID.
const BufferQueue& FindBufferQueue(u32 id) const;
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
/// finished.
@@ -88,27 +76,21 @@ public:
private:
/// Finds the display identified by the specified ID.
Display* FindDisplay(u64 display_id);
VI::Display* FindDisplay(u64 display_id);
/// Finds the display identified by the specified ID.
const Display* FindDisplay(u64 display_id) const;
const VI::Display* FindDisplay(u64 display_id) const;
/// Finds the layer identified by the specified ID in the desired display.
Layer* FindLayer(u64 display_id, u64 layer_id);
VI::Layer* FindLayer(u64 display_id, u64 layer_id);
/// Finds the layer identified by the specified ID in the desired display.
const Layer* FindLayer(u64 display_id, u64 layer_id) const;
const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
std::shared_ptr<Nvidia::Module> nvdrv;
std::array<Display, 5> displays{{
{0, "Default"},
{1, "External"},
{2, "Edid"},
{3, "Internal"},
{4, "Null"},
}};
std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
std::vector<VI::Display> displays;
std::vector<BufferQueue> buffer_queues;
/// Id to use for the next layer that is created, this counter is shared among all displays.
u64 next_layer_id = 1;

View File

@@ -0,0 +1,71 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <utility>
#include <fmt/format.h>
#include "common/assert.h"
#include "core/core.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
auto& kernel = Core::System::GetInstance().Kernel();
vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
fmt::format("Display VSync Event {}", id));
}
Display::~Display() = default;
Layer& Display::GetLayer(std::size_t index) {
return layers.at(index);
}
const Layer& Display::GetLayer(std::size_t index) const {
return layers.at(index);
}
Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
return vsync_event.readable;
}
void Display::SignalVSyncEvent() {
vsync_event.writable->Signal();
}
void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
// TODO(Subv): Support more than 1 layer.
ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
layers.emplace_back(id, buffer_queue);
}
Layer* Display::FindLayer(u64 id) {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
const Layer* Display::FindLayer(u64 id) const {
const auto itr = std::find_if(layers.begin(), layers.end(),
[id](const VI::Layer& layer) { return layer.GetID() == id; });
if (itr == layers.end()) {
return nullptr;
}
return &*itr;
}
} // namespace Service::VI

View File

@@ -0,0 +1,98 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/writable_event.h"
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::VI {
class Layer;
/// Represents a single display type
class Display {
public:
/// Constructs a display with a given unique ID and name.
///
/// @param id The unique ID for this display.
/// @param name The name for this display.
///
Display(u64 id, std::string name);
~Display();
Display(const Display&) = delete;
Display& operator=(const Display&) = delete;
Display(Display&&) = default;
Display& operator=(Display&&) = default;
/// Gets the unique ID assigned to this display.
u64 GetID() const {
return id;
}
/// Gets the name of this display
const std::string& GetName() const {
return name;
}
/// Whether or not this display has any layers added to it.
bool HasLayers() const {
return !layers.empty();
}
/// Gets a layer for this display based off an index.
Layer& GetLayer(std::size_t index);
/// Gets a layer for this display based off an index.
const Layer& GetLayer(std::size_t index) const;
/// Gets the readable vsync event.
Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
/// Signals the internal vsync event.
void SignalVSyncEvent();
/// Creates and adds a layer to this display with the given ID.
///
/// @param id The ID to assign to the created layer.
/// @param buffer_queue The buffer queue for the layer instance to use.
///
void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
Layer* FindLayer(u64 id);
/// Attempts to find a layer with the given ID.
///
/// @param id The layer ID.
///
/// @returns If found, the Layer instance with the given ID.
/// If not found, then nullptr is returned.
///
const Layer* FindLayer(u64 id) const;
private:
u64 id;
std::string name;
std::vector<Layer> layers;
Kernel::EventPair vsync_event;
};
} // namespace Service::VI

View File

@@ -0,0 +1,13 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/hle/service/vi/layer/vi_layer.h"
namespace Service::VI {
Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
Layer::~Layer() = default;
} // namespace Service::VI

View File

@@ -0,0 +1,52 @@
// Copyright 2019 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
namespace Service::NVFlinger {
class BufferQueue;
}
namespace Service::VI {
/// Represents a single display layer.
class Layer {
public:
/// Constructs a layer with a given ID and buffer queue.
///
/// @param id The ID to assign to this layer.
/// @param queue The buffer queue for this layer to use.
///
Layer(u64 id, NVFlinger::BufferQueue& queue);
~Layer();
Layer(const Layer&) = delete;
Layer& operator=(const Layer&) = delete;
Layer(Layer&&) = default;
Layer& operator=(Layer&&) = delete;
/// Gets the ID for this layer.
u64 GetID() const {
return id;
}
/// Gets a reference to the buffer queue this layer is using.
NVFlinger::BufferQueue& GetBufferQueue() {
return buffer_queue;
}
/// Gets a const reference to the buffer queue this layer is using.
const NVFlinger::BufferQueue& GetBufferQueue() const {
return buffer_queue;
}
private:
u64 id;
NVFlinger::BufferQueue& buffer_queue;
};
} // namespace Service::VI

View File

@@ -420,7 +420,7 @@ public:
u32_le fence_is_valid;
std::array<Fence, 2> fences;
MathUtil::Rectangle<int> GetCropRect() const {
Common::Rectangle<int> GetCropRect() const {
return {crop_left, crop_top, crop_right, crop_bottom};
}
};
@@ -525,7 +525,7 @@ private:
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
static_cast<u32>(transaction), flags);
auto buffer_queue = nv_flinger->FindBufferQueue(id);
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
if (transaction == TransactionId::Connect) {
IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
} else if (transaction == TransactionId::SetPreallocatedBuffer) {
IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
IGBPSetPreallocatedBufferResponseParcel response{};
ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
const u32 width{request.data.width};
const u32 height{request.data.height};
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
if (slot) {
// Buffer is available
@@ -559,8 +559,8 @@ private:
[=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available
auto buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
},
buffer_queue->GetWritableBufferWaitEvent());
buffer_queue.GetWritableBufferWaitEvent());
}
} else if (transaction == TransactionId::RequestBuffer) {
IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
auto& buffer = buffer_queue->RequestBuffer(request.slot);
auto& buffer = buffer_queue.RequestBuffer(request.slot);
IGBPRequestBufferResponseParcel response{buffer};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::QueueBuffer) {
IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect());
IGBPQueueBufferResponseParcel response{1280, 720};
ctx.WriteBuffer(response.Serialize());
} else if (transaction == TransactionId::Query) {
IGBPQueryRequestParcel request{ctx.ReadBuffer()};
u32 value =
buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
const u32 value =
buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
IGBPQueryResponseParcel response{value};
ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:
LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
const auto buffer_queue = nv_flinger->FindBufferQueue(id);
const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
// TODO(Subv): Find out what this actually is.
IPC::ResponseBuilder rb{ctx, 2, 1};
rb.Push(RESULT_SUCCESS);
rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
}
std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +752,7 @@ public:
{1102, nullptr, "GetDisplayResolution"},
{2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
{2011, nullptr, "DestroyManagedLayer"},
{2012, nullptr, "CreateStrayLayer"},
{2050, nullptr, "CreateIndirectLayer"},
{2051, nullptr, "DestroyIndirectLayer"},
{2052, nullptr, "CreateIndirectProducerEndPoint"},

View File

@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
FlushMode::FlushAndInvalidate);
VAddr end = base + size;
while (base != end) {
ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size());
page_table.attributes[base] = type;
page_table.pointers[base] = memory;
std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
base += 1;
if (memory != nullptr)
if (memory == nullptr) {
std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
} else {
while (base != end) {
page_table.pointers[base] = memory;
base += 1;
memory += PAGE_SIZE;
}
}
}
@@ -166,9 +171,6 @@ T Read(const VAddr vaddr) {
return value;
}
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
case PageType::Unmapped:
@@ -199,9 +201,6 @@ void Write(const VAddr vaddr, const T data) {
return;
}
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
case PageType::Unmapped:
@@ -357,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
const VAddr overlap_end = std::min(end, region_end);
const VAddr overlap_size = overlap_end - overlap_start;
auto& rasterizer = system_instance.Renderer().Rasterizer();
auto& gpu = system_instance.GPU();
switch (mode) {
case FlushMode::Flush:
rasterizer.FlushRegion(overlap_start, overlap_size);
gpu.FlushRegion(overlap_start, overlap_size);
break;
case FlushMode::Invalidate:
rasterizer.InvalidateRegion(overlap_start, overlap_size);
gpu.InvalidateRegion(overlap_start, overlap_size);
break;
case FlushMode::FlushAndInvalidate:
rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
break;
}
};

View File

@@ -393,6 +393,7 @@ struct Values {
u16 frame_limit;
bool use_disk_shader_cache;
bool use_accurate_gpu_emulation;
bool use_asynchronous_gpu_emulation;
float bg_red;
float bg_green;

View File

@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
Settings::values.use_disk_shader_cache);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
Settings::values.use_accurate_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
Settings::values.use_docked_mode);
}

View File

@@ -32,12 +32,12 @@ public:
}
void BeginTilt(int x, int y) {
mouse_origin = Math::MakeVec(x, y);
mouse_origin = Common::MakeVec(x, y);
is_tilting = true;
}
void Tilt(int x, int y) {
auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
if (is_tilting) {
std::lock_guard<std::mutex> guard(tilt_mutex);
if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
} else {
tilt_direction = mouse_move.Cast<float>();
tilt_angle =
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
}
}
}
@@ -56,7 +56,7 @@ public:
is_tilting = false;
}
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
std::lock_guard<std::mutex> guard(status_mutex);
return status;
}
@@ -66,17 +66,17 @@ private:
const std::chrono::steady_clock::duration update_duration;
const float sensitivity;
Math::Vec2<int> mouse_origin;
Common::Vec2<int> mouse_origin;
std::mutex tilt_mutex;
Math::Vec2<float> tilt_direction;
Common::Vec2<float> tilt_direction;
float tilt_angle = 0;
bool is_tilting = false;
Common::Event shutdown_event;
std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
std::mutex status_mutex;
// Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
void MotionEmuThread() {
auto update_time = std::chrono::steady_clock::now();
Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
Math::Quaternion<float> old_q;
Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
Common::Quaternion<float> old_q;
while (!shutdown_event.WaitUntil(update_time)) {
update_time += update_duration;
@@ -96,18 +96,18 @@ private:
std::lock_guard<std::mutex> guard(tilt_mutex);
// Find the quaternion describing current 3DS tilting
q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
tilt_angle);
q = Common::MakeQuaternion(
Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
}
auto inv_q = q.Inverse();
// Set the gravity vector in world space
auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
// Find the angular rate vector in world space
auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
angular_rate *= 1000 / update_millisecond / Common::PI * 180;
// Transform the two vectors from world space to 3DS space
gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
}
std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
return device->GetStatus();
}

View File

@@ -13,11 +13,11 @@
namespace ArmTests {
TestEnvironment::TestEnvironment(bool mutable_memory_)
: mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
: mutable_memory(mutable_memory_),
test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
auto process = Kernel::Process::Create(kernel, "");
kernel.MakeCurrentProcess(process.get());
page_table = &Core::CurrentProcess()->VMManager().page_table;
page_table = &process->VMManager().page_table;
std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
page_table->special_regions.clear();

View File

@@ -17,6 +17,12 @@ add_library(video_core STATIC
engines/shader_header.h
gpu.cpp
gpu.h
gpu_asynch.cpp
gpu_asynch.h
gpu_synch.cpp
gpu_synch.h
gpu_thread.cpp
gpu_thread.h
macro_interpreter.cpp
macro_interpreter.h
memory_manager.cpp
@@ -94,6 +100,8 @@ add_library(video_core STATIC
surface.h
textures/astc.cpp
textures/astc.h
textures/convert.cpp
textures/convert.h
textures/decoders.cpp
textures/decoders.h
textures/texture.h
@@ -104,8 +112,18 @@ add_library(video_core STATIC
if (ENABLE_VULKAN)
target_sources(video_core PRIVATE
renderer_vulkan/declarations.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
renderer_vulkan/vk_device.cpp
renderer_vulkan/vk_device.h)
renderer_vulkan/vk_device.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_resource_manager.cpp
renderer_vulkan/vk_resource_manager.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h)
target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)

View File

@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
}
bool DmaPusher::Step() {
if (dma_get != dma_put) {
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
if (!ib_enable || dma_pushbuffer.empty()) {
// pushbuffer empty and IB empty or nonexistent - nothing to do
return false;
}
const CommandHeader command_header{Memory::Read32(*address)};
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
GPUVAddr dma_get = command_list_header.addr;
GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
bool non_main = command_list_header.is_non_main;
dma_get += sizeof(u32);
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
if (!non_main) {
dma_mget = dma_get;
}
if (command_list_header.size == 0) {
return true;
}
// Push buffer non-empty, read a word
const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
ASSERT_MSG(address, "Invalid GPU address");
command_headers.resize(command_list_header.size);
Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
for (const CommandHeader& command_header : command_headers) {
// now, see if we're in the middle of a command
if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
break;
}
}
} else if (ib_enable && !dma_pushbuffer.empty()) {
// Current pushbuffer empty, but we have more IB entries to read
const CommandList& command_list{dma_pushbuffer.front()};
const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
dma_get = command_list_header.addr;
dma_put = dma_get + command_list_header.size * sizeof(u32);
non_main = command_list_header.is_non_main;
}
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
dma_pushbuffer.pop();
dma_pushbuffer_subindex = 0;
}
} else {
// Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
return {};
if (!non_main) {
// TODO (degasus): This is dead code, as dma_mget is never read.
dma_mget = dma_put;
}
return true;

View File

@@ -75,6 +75,8 @@ private:
GPU& gpu;
std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
@@ -89,11 +91,8 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
GPUVAddr dma_put{}; ///< pushbuffer current end address
GPUVAddr dma_get{}; ///< pushbuffer current read address
GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
bool non_main{}; ///< non-main pushbuffer active
};
} // namespace Tegra

View File

@@ -2,12 +2,11 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "core/memory.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
@@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() {
const u32 src_blit_y2{
static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
regs.blit_dst_x + regs.blit_dst_width,
regs.blit_dst_y + regs.blit_dst_height};
if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
UNIMPLEMENTED();

View File

@@ -5,7 +5,7 @@
#pragma once
#include <array>
#include "common/assert.h"
#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"

View File

@@ -2,9 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/memory_manager.h"

View File

@@ -5,8 +5,7 @@
#pragma once
#include <array>
#include "common/assert.h"
#include "common/bit_field.h"
#include <cstddef>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
@@ -11,9 +12,9 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
: system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
KeplerMemory::~KeplerMemory() = default;
@@ -47,10 +48,10 @@ void KeplerMemory::ProcessData(u32 data) {
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));
Memory::Write32(*dest_address, data);
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
state.write_offset++;
}

View File

@@ -5,13 +5,17 @@
#pragma once
#include <array>
#include "common/assert.h"
#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -23,7 +27,8 @@ namespace Tegra::Engines {
class KeplerMemory final {
public:
KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -76,6 +81,7 @@ public:
} state{};
private:
Core::System& system;
MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;

View File

@@ -19,8 +19,10 @@ namespace Tegra::Engines {
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
macro_interpreter(*this) {
InitializeRegisterDefaults();
}
@@ -103,23 +105,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
auto debug_context = system.GetGPUDebugContext();
const u32 method = method_call.method;
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
ASSERT(method_call.method == executing_macro + 1);
ASSERT(method == executing_macro + 1);
}
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method_call.method >= MacroRegistersStart) {
if (method >= MacroRegistersStart) {
// We're trying to execute a macro
if (executing_macro == 0) {
// A macro call must begin by writing the macro method's register, not its argument.
ASSERT_MSG((method_call.method % 2) == 0,
ASSERT_MSG((method % 2) == 0,
"Can't start macro execution by writing to the ARGS register");
executing_macro = method_call.method;
executing_macro = method;
}
macro_params.push_back(method_call.argument);
@@ -131,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
return;
}
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
if (regs.reg_array[method_call.method] != method_call.argument) {
regs.reg_array[method_call.method] = method_call.argument;
if (regs.reg_array[method] != method_call.argument) {
regs.reg_array[method] = method_call.argument;
// Color buffers
constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
if (method_call.method >= first_rt_reg &&
method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
if (method >= first_rt_reg &&
method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
dirty_flags.color_buffer.set(rt_index);
}
// Zeta buffer
constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
method == MAXWELL3D_REG_INDEX(zeta_width) ||
method == MAXWELL3D_REG_INDEX(zeta_height) ||
(method >= MAXWELL3D_REG_INDEX(zeta) &&
method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
dirty_flags.zeta_buffer = true;
}
// Shader
constexpr u32 shader_registers_count =
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
dirty_flags.shaders = true;
}
// Vertex format
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method_call.method <
MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
dirty_flags.vertex_attrib_format = true;
}
// Vertex buffer
if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array |=
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array |=
1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array |=
1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
} else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
} else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
}
}
switch (method_call.method) {
switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(method_call.argument);
break;
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
LongQueryResult query_result{};
query_result.value = result;
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
query_result.timestamp = system.CoreTiming().GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
}
dirty_flags.OnMemoryWrite();
@@ -334,7 +334,7 @@ void Maxwell3D::DrawArrays() {
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
auto debug_context = system.GetGPUDebugContext();
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);

View File

@@ -5,8 +5,10 @@
#pragma once
#include <array>
#include <bitset>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -17,6 +19,10 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -28,7 +34,8 @@ namespace Tegra::Engines {
class Maxwell3D final {
public:
explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~Maxwell3D() = default;
/// Register structure of the Maxwell3D engine.
@@ -498,7 +505,7 @@ public:
f32 translate_z;
INSERT_PADDING_WORDS(2);
MathUtil::Rectangle<s32> GetRect() const {
Common::Rectangle<s32> GetRect() const {
return {
GetX(), // left
GetY() + GetHeight(), // top
@@ -1089,19 +1096,18 @@ public:
MemoryManager& memory_manager;
struct DirtyFlags {
u8 color_buffer = 0xFF;
bool zeta_buffer = true;
bool shaders = true;
std::bitset<8> color_buffer{0xFF};
std::bitset<32> vertex_array{0xFFFFFFFF};
bool vertex_attrib_format = true;
u32 vertex_array = 0xFFFFFFFF;
bool zeta_buffer = true;
bool shaders = true;
void OnMemoryWrite() {
color_buffer = 0xFF;
zeta_buffer = true;
shaders = true;
vertex_array = 0xFFFFFFFF;
color_buffer.set();
vertex_array.set();
}
};
@@ -1131,6 +1137,8 @@ public:
private:
void InitializeRegisterDefaults();
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
/// Start offsets of each macro in macro_memory

View File

@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
@@ -11,8 +13,9 @@
namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -59,7 +62,7 @@ void MaxwellDMA::HandleCopy() {
}
// All copies here update the main memory, so mark all rasterizer states as invalid.
Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
@@ -89,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(*source_cpu, src_size);
Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(*dest_cpu, dst_size);
Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
};
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {

View File

@@ -5,13 +5,17 @@
#pragma once
#include <array>
#include "common/assert.h"
#include <cstddef>
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -20,7 +24,8 @@ namespace Tegra::Engines {
class MaxwellDMA final {
public:
explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager);
~MaxwellDMA() = default;
/// Write the value to the register identified by method.
@@ -137,6 +142,8 @@ public:
MemoryManager& memory_manager;
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
/// Performs the copy from the source buffer to the destination buffer as configured in the

View File

@@ -6,7 +6,6 @@
#include <bitset>
#include <optional>
#include <string>
#include <tuple>
#include <vector>
@@ -376,9 +375,9 @@ enum class R2pMode : u64 {
};
enum class IpaInterpMode : u64 {
Linear = 0,
Perspective = 1,
Flat = 2,
Pass = 0,
Multiply = 1,
Constant = 2,
Sc = 3,
};

View File

@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
TriangleStrip = 7,
};
enum class AttributeUse : u8 {
Unused = 0,
Constant = 1,
Perspective = 2,
ScreenLinear = 3,
};
// Documentation in:
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
struct Header {
@@ -84,9 +91,15 @@ struct Header {
} vtg;
struct {
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
union {
BitField<0, 2, AttributeUse> x;
BitField<2, 2, AttributeUse> y;
BitField<4, 2, AttributeUse> w;
BitField<6, 2, AttributeUse> z;
u8 raw;
} imap_generic_vector[32];
INSERT_PADDING_BYTES(2); // ImapColor
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
const u32 bit = render_target * 4 + component;
return omap.target & (1 << bit);
}
AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
return static_cast<AttributeUse>(
(imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
}
AttributeUse GetAttributeUse(u32 attribute) const {
AttributeUse result = AttributeUse::Unused;
for (u32 i = 0; i < 4; i++) {
const auto index = GetAttributeIndexUse(attribute, i);
if (index == AttributeUse::Unused) {
continue;
}
if (result == AttributeUse::Unused || result == index) {
result = index;
continue;
}
LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
if (index == AttributeUse::Perspective) {
result = index;
}
}
return result;
}
} ps;
};

View File

@@ -12,7 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra {
@@ -28,14 +28,15 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
UNREACHABLE();
}
GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>();
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
}
GPU::~GPU() = default;

View File

@@ -6,16 +6,19 @@
#include <array>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/dma_pusher.h"
#include "video_core/memory_manager.h"
namespace VideoCore {
class RasterizerInterface;
namespace Core {
class System;
}
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace Tegra {
enum class RenderTargetFormat : u32 {
@@ -97,7 +100,7 @@ struct FramebufferConfig {
using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
TransformFlags transform_flags;
MathUtil::Rectangle<int> crop_rect;
Common::Rectangle<int> crop_rect;
};
namespace Engines {
@@ -116,9 +119,10 @@ enum class EngineID {
MAXWELL_DMA_COPY_A = 0xB0B5,
};
class GPU final {
class GPU {
public:
explicit GPU(VideoCore::RasterizerInterface& rasterizer);
explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
~GPU();
struct MethodCall {
@@ -197,8 +201,42 @@ public:
};
} regs{};
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
/// Swap buffers (render frame)
virtual void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
private:
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
/// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
/// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
/// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
VideoCore::RendererBase& renderer;
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids.
@@ -214,18 +252,6 @@ private:
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
/// Inline memory engine
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
void ProcessBindMethod(const MethodCall& method_call);
void ProcessSemaphoreTriggerMethod();
void ProcessSemaphoreRelease();
void ProcessSemaphoreAcquire();
// Calls a GPU puller method.
void CallPullerMethod(const MethodCall& method_call);
// Calls a GPU engine method.
void CallEngineMethod(const MethodCall& method_call);
// Determines where the method should be executed.
bool ExecuteMethodOnEngine(const MethodCall& method_call);
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_asynch.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
: Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
void GPUAsynch::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
gpu_thread.SwapBuffers(std::move(framebuffer));
}
void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
gpu_thread.FlushRegion(addr, size);
}
void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
gpu_thread.InvalidateRegion(addr, size);
}
void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
gpu_thread.FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View File

@@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
namespace GPUThread {
class ThreadManager;
} // namespace GPUThread
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch();
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
private:
GPUThread::ThreadManager gpu_thread;
};
} // namespace VideoCommon

View File

@@ -0,0 +1,37 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
namespace VideoCommon {
GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
: Tegra::GPU(system, renderer) {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
}
void GPUSynch::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
renderer.SwapBuffers(std::move(framebuffer));
}
void GPUSynch::FlushRegion(VAddr addr, u64 size) {
renderer.Rasterizer().FlushRegion(addr, size);
}
void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer().InvalidateRegion(addr, size);
}
void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
}
} // namespace VideoCommon

View File

@@ -0,0 +1,29 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/gpu.h"
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch();
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
};
} // namespace VideoCommon

View File

@@ -0,0 +1,152 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "core/settings.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
#include "video_core/renderer_base.h"
namespace VideoCommon::GPUThread {
/// Executes a single GPU thread command
static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher) {
if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
renderer.SwapBuffers(data->framebuffer);
} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
} else {
UNREACHABLE();
}
}
/// Runs the GPU thread
static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
MicroProfileOnThreadCreate("GpuThread");
auto WaitForWakeup = [&]() {
std::unique_lock<std::mutex> lock{state.signal_mutex};
state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
};
// Wait for first GPU command before acquiring the window context
WaitForWakeup();
// If emulation was stopped during disk shader loading, abort before trying to acquire context
if (!state.is_running) {
return;
}
Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
while (state.is_running) {
if (!state.is_running) {
return;
}
{
// Thread has been woken up, so make the previous write queue the next read queue
std::lock_guard<std::mutex> lock{state.signal_mutex};
std::swap(state.push_queue, state.pop_queue);
}
// Execute all of the GPU commands
while (!state.pop_queue->empty()) {
ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
state.pop_queue->pop();
}
state.UpdateIdleState();
// Signal that the GPU thread has finished processing commands
if (state.is_idle) {
state.idle_condition.notify_one();
}
// Wait for CPU thread to send more GPU commands
WaitForWakeup();
}
}
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
std::ref(dma_pusher), std::ref(state)},
thread_id{thread.get_id()} {}
ThreadManager::~ThreadManager() {
{
// Notify GPU thread that a shutdown is pending
std::lock_guard<std::mutex> lock{state.signal_mutex};
state.is_running = false;
}
state.signal_condition.notify_one();
thread.join();
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
if (entries.empty()) {
return;
}
PushCommand(SubmitListCommand(std::move(entries)), false, false);
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
// Block the CPU when using accurate emulation
PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
PushCommand(InvalidateRegionCommand(addr, size), true, true);
}
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
{
std::lock_guard<std::mutex> lock{state.signal_mutex};
if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
// Execute the command synchronously on the current thread
ExecuteCommand(&command_data, renderer, dma_pusher);
return;
}
// Push the command to the GPU thread
state.UpdateIdleState();
state.push_queue->emplace(command_data);
}
// Signal the GPU thread that commands are pending
state.signal_condition.notify_one();
if (wait_for_idle) {
// Wait for the GPU to be idle (all commands to be executed)
std::unique_lock<std::mutex> lock{state.idle_mutex};
state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
}
}
} // namespace VideoCommon::GPUThread

136
src/video_core/gpu_thread.h Normal file
View File

@@ -0,0 +1,136 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <variant>
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
Tegra::CommandList entries;
};
/// Command to signal to the GPU thread that a swap buffers is pending
struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
: framebuffer{std::move(framebuffer)} {}
std::optional<const Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
: addr{addr}, size{size} {}
const VAddr addr;
const u64 size;
};
using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic<bool> is_running{true};
std::atomic<bool> is_idle{true};
std::condition_variable signal_condition;
std::mutex signal_mutex;
std::condition_variable idle_condition;
std::mutex idle_mutex;
// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
// empty. This allows for efficient thread-safe access, as it does not require any copies.
using CommandQueue = std::queue<CommandData>;
std::array<CommandQueue, 2> command_queues;
CommandQueue* push_queue{&command_queues[0]};
CommandQueue* pop_queue{&command_queues[1]};
void UpdateIdleState() {
std::lock_guard<std::mutex> lock{idle_mutex};
is_idle = command_queues[0].empty() && command_queues[1].empty();
}
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
void InvalidateRegion(VAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(VAddr addr, u64 size);
/// Waits the caller until the GPU thread is idle, used for synchronization
void WaitForIdle();
private:
/// Pushes a command to be executed by the GPU thread
void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
/// Returns true if this is called by the GPU thread
bool IsGpuThread() const {
return std::this_thread::get_id() == thread_id;
}
private:
SynchState state;
std::thread thread;
std::thread::id thread_id;
VideoCore::RendererBase& renderer;
Tegra::DmaPusher& dma_pusher;
};
} // namespace VideoCommon::GPUThread

View File

@@ -129,6 +129,15 @@ protected:
return ++modified_ticks;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
if (!object->IsDirty()) {
return;
}
object->Flush();
object->MarkAsModified(false, *this);
}
private:
/// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
return objects;
}
/// Flushes the specified object, updating appropriate cache state as needed
void FlushObject(const T& object) {
if (!object->IsDirty()) {
return;
}
object->Flush();
object->MarkAsModified(false, *this);
}
using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;

View File

@@ -47,8 +47,8 @@ public:
/// Attempt to use a faster method to perform a surface copy
virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
return false;
}

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"

View File

@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
}
// Rebinding the VAO invalidates the vertex buffer bindings.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
gpu.dirty_flags.vertex_array.set();
state.draw.vertex_array = vao_entry.handle;
return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
if (!gpu.dirty_flags.vertex_array)
if (gpu.dirty_flags.vertex_array.none())
return;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (~gpu.dirty_flags.vertex_array & (1u << index))
if (!gpu.dirty_flags.vertex_array[index])
continue;
const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
}
}
gpu.dirty_flags.vertex_array = 0;
gpu.dirty_flags.vertex_array.reset();
}
DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
single_color_target};
if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
!gpu.dirty_flags.zeta_buffer) {
if (fb_config_state == current_framebuffer_config_state &&
gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
bool invalidate = buffer_cache.Map(buffer_size);
const bool invalidate = buffer_cache.Map(buffer_size);
if (invalidate) {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
gpu.dirty_flags.vertex_array.set();
}
const GLuint vao = SetupVertexFormat();
@@ -738,30 +738,18 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state);
state.Apply();
// Execute draw call
res_cache.SignalPreDrawCall();
params.DispatchDraw();
// Disable scissor test
state.viewports[0].scissor.enabled = false;
res_cache.SignalPostDrawCall();
accelerate_draw = AccelDraw::Disabled;
// Unbind textures for potential future use as framebuffer attachments
for (auto& texture_unit : state.texture_units) {
texture_unit.Unbind();
}
state.Apply();
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (Settings::values.use_accurate_gpu_emulation) {
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
res_cache.FlushRegion(addr, size);
}
res_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -779,8 +767,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) {
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) {
MICROPROFILE_SCOPE(OpenGL_Blits);
res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
return true;
@@ -1034,7 +1022,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
for (std::size_t i = 0; i < viewport_count; i++) {
auto& viewport = current_state.viewports[i];
const auto& src = regs.viewports[i];
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
viewport.x = viewport_rect.left;
viewport.y = viewport_rect.bottom;
viewport.width = viewport_rect.GetWidth();

View File

@@ -62,8 +62,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect) override;
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include <optional>
#include <glad/glad.h>
#include "common/alignment.h"
@@ -20,7 +21,7 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/surface.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/convert.h"
#include "video_core/textures/decoders.h"
namespace OpenGL {
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return format;
}
MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
if (IsPixelFormatASTC(pixel_format)) {
// ASTC formats must stop at the ATSC block size boundary
@@ -423,7 +424,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
for (u32 i = 0; i < params.depth; i++) {
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
offset += layer_size;
offset_gl += gl_size;
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
// alternatives. This signals a bug on those functions.
const auto width = static_cast<GLsizei>(params.MipWidth(0));
const auto height = static_cast<GLsizei>(params.MipHeight(0));
memory_size = params.MemorySize();
reinterpreted = false;
const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
gl_internal_format = format_tuple.internal_format;
@@ -594,103 +597,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
}
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
union S8Z24 {
BitField<0, 24, u32> z24;
BitField<24, 8, u32> s8;
};
static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
union Z24S8 {
BitField<0, 8, u32> s8;
BitField<8, 24, u32> z24;
};
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
if (reverse) {
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
s8z24_pixel.s8.Assign(z24s8_pixel.s8);
s8z24_pixel.z24.Assign(z24s8_pixel.z24);
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
} else {
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
z24s8_pixel.s8.Assign(s8z24_pixel.s8);
z24s8_pixel.z24.Assign(s8z24_pixel.z24);
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
}
}
}
}
/**
* Helper function to perform software conversion (as needed) when loading a buffer from Switch
* memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
* typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height, u32 depth) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_8X5:
case PixelFormat::ASTC_2D_5X4:
case PixelFormat::ASTC_2D_5X5:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8:
case PixelFormat::ASTC_2D_10X8_SRGB: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
data =
Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
break;
}
case PixelFormat::S8Z24:
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height, false);
break;
}
}
/**
* Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
* Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
* with typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_5X5:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8:
case PixelFormat::ASTC_2D_10X8_SRGB: {
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
static_cast<u32>(pixel_format));
UNREACHABLE();
break;
}
case PixelFormat::S8Z24:
// Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height, true);
break;
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
@@ -719,8 +625,16 @@ void CachedSurface::LoadGLBuffer() {
}
}
for (u32 i = 0; i < params.max_mip_level; i++) {
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
params.MipHeight(i), params.MipDepth(i));
const u32 width = params.MipWidth(i);
const u32 height = params.MipHeight(i);
const u32 depth = params.MipDepth(i);
if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
// Reserve size for RGBA8 conversion
constexpr std::size_t rgba_bpp = 4;
gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
}
Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
height, depth, true, true);
}
}
@@ -743,8 +657,8 @@ void CachedSurface::FlushGLBuffer() {
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
params.height);
Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
params.height, params.depth, true, true);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
@@ -962,30 +876,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
const auto& regs{gpu.regs};
if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
return last_color_buffers[index];
if (!gpu.dirty_flags.color_buffer[index]) {
return current_color_buffers[index];
}
gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
gpu.dirty_flags.color_buffer.reset(index);
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
if (index >= regs.rt_control.count) {
return last_color_buffers[index] = {};
return current_color_buffers[index] = {};
}
if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
return last_color_buffers[index] = {};
return current_color_buffers[index] = {};
}
const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->LoadGLBuffer();
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->MarkAsModified(false, *this);
surface->MarkForReload(false);
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -997,18 +912,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
Surface surface{TryGet(params.addr)};
if (surface) {
if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
// Use the cached surface as-is
// Use the cached surface as-is unless it's not synced with memory
if (surface->MustReload())
LoadSurface(surface);
return surface;
} else if (preserve_contents) {
// If surface parameters changed and we care about keeping the previous data, recreate
// the surface from the old one
Surface new_surface{RecreateSurface(surface, params)};
Unregister(surface);
UnregisterSurface(surface);
Register(new_surface);
if (new_surface->IsUploaded()) {
RegisterReinterpretSurface(new_surface);
}
return new_surface;
} else {
// Delete the old surface before creating a new one to prevent collisions.
Unregister(surface);
UnregisterSurface(surface);
}
}
@@ -1062,8 +982,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
}
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
std::size_t cubemap_face = 0) {
@@ -1193,7 +1113,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
void RasterizerCacheOpenGL::FermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1257,7 +1177,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
FastLayeredCopySurface(old_surface, new_surface);
if (old_params.pixel_format == new_params.pixel_format)
FastLayeredCopySurface(old_surface, new_surface);
else {
AccurateCopySurface(old_surface, new_surface);
}
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1286,4 +1210,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
return {};
}
static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
u32 height) {
for (u32 i = 0; i < params.max_mip_level; i++) {
if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
return {i};
}
}
return {};
}
static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
const std::size_t size = params.LayerMemorySize();
VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
for (u32 i = 0; i < params.depth; i++) {
if (start == addr) {
return {i};
}
start += size;
}
return {};
}
static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
const Surface blitted_surface) {
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
const std::size_t src_memory_size = src_params.size_in_bytes;
const std::optional<u32> level =
TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
if (level.has_value()) {
if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
src_params.height == dst_params.MipHeight(*level) &&
src_params.block_height >= dst_params.MipBlockHeight(*level)) {
const std::optional<u32> slot =
TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
if (slot.has_value()) {
glCopyImageSubData(render_surface->Texture().handle,
SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
blitted_surface->Texture().handle,
SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
blitted_surface->MarkAsModified(true, cache);
return true;
}
}
}
return false;
}
static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
if (bound2 > bound1)
return true;
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
return (dst_params.component_type != src_params.component_type);
}
static bool IsReinterpretInvalidSecond(const Surface render_surface,
const Surface blitted_surface) {
const auto& dst_params = blitted_surface->GetSurfaceParams();
const auto& src_params = render_surface->GetSurfaceParams();
return (dst_params.height > src_params.height && dst_params.width > src_params.width);
}
bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
Surface intersect) {
if (IsReinterpretInvalid(triggering_surface, intersect)) {
UnregisterSurface(intersect);
return false;
}
if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
UnregisterSurface(intersect);
return false;
}
FlushObject(intersect);
FlushObject(triggering_surface);
intersect->MarkForReload(true);
}
return true;
}
void RasterizerCacheOpenGL::SignalPreDrawCall() {
if (texception && GLAD_GL_ARB_texture_barrier) {
glTextureBarrier();
}
texception = false;
}
void RasterizerCacheOpenGL::SignalPostDrawCall() {
for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
if (current_color_buffers[i] != nullptr) {
Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
if (intersect != nullptr) {
PartialReinterpretSurface(current_color_buffers[i], intersect);
texception = true;
}
}
}
}
} // namespace OpenGL

View File

@@ -28,15 +28,15 @@ namespace OpenGL {
class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
using SurfaceType = VideoCore::Surface::SurfaceType;
using PixelFormat = VideoCore::Surface::PixelFormat;
using ComponentType = VideoCore::Surface::ComponentType;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct SurfaceParams {
enum class SurfaceClass {
Uploaded,
RenderTarget,
@@ -72,7 +72,7 @@ struct SurfaceParams {
}
/// Returns the rectangle corresponding to this surface
MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -141,10 +141,18 @@ struct SurfaceParams {
return offset;
}
std::size_t GetMipmapSingleSize(u32 mip_level) const {
return InnerMipmapMemorySize(mip_level, false, is_layered);
}
u32 MipWidth(u32 mip_level) const {
return std::max(1U, width >> mip_level);
}
u32 MipWidthGobAligned(u32 mip_level) const {
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
}
u32 MipHeight(u32 mip_level) const {
return std::max(1U, height >> mip_level);
}
@@ -169,20 +177,27 @@ struct SurfaceParams {
}
u32 MipBlockDepth(u32 mip_level) const {
if (mip_level == 0)
if (mip_level == 0) {
return block_depth;
if (is_layered)
}
if (is_layered) {
return 1;
u32 depth = MipDepth(mip_level);
}
const u32 mip_depth = MipDepth(mip_level);
u32 bd = 32;
while (bd > 1 && depth * 2 <= bd) {
while (bd > 1 && mip_depth * 2 <= bd) {
bd >>= 1;
}
if (bd == 32) {
u32 bh = MipBlockHeight(mip_level);
if (bh >= 4)
const u32 bh = MipBlockHeight(mip_level);
if (bh >= 4) {
return 16;
}
}
return bd;
}
@@ -340,6 +355,10 @@ public:
return cached_size_in_bytes;
}
std::size_t GetMemorySize() const {
return memory_size;
}
void Flush() override {
FlushGLBuffer();
}
@@ -389,6 +408,26 @@ public:
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w);
void MarkReinterpreted() {
reinterpreted = true;
}
bool IsReinterpreted() const {
return reinterpreted;
}
void MarkForReload(bool reload) {
must_reload = reload;
}
bool MustReload() const {
return must_reload;
}
bool IsUploaded() const {
return params.identity == SurfaceParams::SurfaceClass::Uploaded;
}
private:
void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
@@ -402,6 +441,9 @@ private:
GLenum gl_internal_format{};
std::size_t cached_size_in_bytes{};
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
std::size_t memory_size;
bool reinterpreted = false;
bool must_reload = false;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -424,8 +466,11 @@ public:
/// Copies the contents of one surface to another
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const MathUtil::Rectangle<u32>& src_rect,
const MathUtil::Rectangle<u32>& dst_rect);
const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect);
void SignalPreDrawCall();
void SignalPostDrawCall();
private:
void LoadSurface(const Surface& surface);
@@ -443,6 +488,10 @@ private:
/// Tries to get a reserved surface for the specified parameters
Surface TryGetReservedSurface(const SurfaceParams& params);
// Partialy reinterpret a surface based on a triggering_surface that collides with it.
// returns true if the reinterpret was successful, false in case it was not.
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -459,12 +508,50 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
bool texception = false;
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
/// using the new format.
OGLBuffer copy_pbo;
std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
Surface last_depth_buffer;
using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
static auto GetReinterpretInterval(const Surface& object) {
return SurfaceInterval::right_open(object->GetAddr() + 1,
object->GetAddr() + object->GetMemorySize() - 1);
}
// Reinterpreted surfaces are very fragil as the game may keep rendering into them.
SurfaceIntervalCache reinterpreted_surfaces;
void RegisterReinterpretSurface(Surface reinterpret_surface) {
auto interval = GetReinterpretInterval(reinterpret_surface);
reinterpreted_surfaces.insert({interval, reinterpret_surface});
reinterpret_surface->MarkReinterpreted();
}
Surface CollideOnReinterpretedSurface(VAddr addr) const {
const SurfaceInterval interval{addr};
for (auto& pair :
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
return pair.second;
}
return nullptr;
}
/// Unregisters an object from the cache
void UnregisterSurface(const Surface& object) {
if (object->IsReinterpreted()) {
auto interval = GetReinterpretInterval(object);
reinterpreted_surfaces.erase(interval);
}
Unregister(object);
}
};
} // namespace OpenGL

View File

@@ -20,6 +20,7 @@
namespace OpenGL::GLShader {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
code.AddNewLine();
}
std::string GetInputFlags(const IpaMode& input_mode) {
const IpaSampleMode sample_mode = input_mode.sampling_mode;
const IpaInterpMode interp_mode = input_mode.interpolation_mode;
std::string GetInputFlags(AttributeUse attribute) {
std::string out;
switch (interp_mode) {
case IpaInterpMode::Flat:
switch (attribute) {
case AttributeUse::Constant:
out += "flat ";
break;
case IpaInterpMode::Linear:
case AttributeUse::ScreenLinear:
out += "noperspective ";
break;
case IpaInterpMode::Perspective:
case AttributeUse::Perspective:
// Default, Smooth
break;
default:
UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
}
switch (sample_mode) {
case IpaSampleMode::Centroid:
// It can be implemented with the "centroid " keyword in GLSL
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
break;
case IpaSampleMode::Default:
// Default, n/a
break;
default:
UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
UNREACHABLE();
}
return out;
}
@@ -324,16 +313,11 @@ private:
const auto& attributes = ir.GetInputAttributes();
for (const auto element : attributes) {
const Attribute::Index index = element.first;
const IpaMode& input_mode = *element.second.begin();
if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
// Skip when it's not a generic attribute
continue;
}
ASSERT(element.second.size() > 0);
UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
"Multiple input flag modes are not supported in GLSL");
// TODO(bunnei): Use proper number of elements for these
u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
if (stage == ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(input_mode) + "in vec4 " + attr + ';');
std::string suffix;
if (stage == ShaderStage::Fragment) {
const auto input_mode =
header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
suffix = GetInputFlags(input_mode);
}
code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
attr + ';');
}
if (!attributes.empty())
code.AddNewLine();
@@ -616,17 +606,8 @@ private:
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
std::string value = VisitOperand(operation, operand_index);
switch (type) {
case Type::Bool:
case Type::Bool2:
case Type::Float:
return value;
case Type::Int:
return "ftoi(" + value + ')';
case Type::Uint:
return "ftou(" + value + ')';
case Type::HalfFloat:
case Type::HalfFloat: {
const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
if (!half_meta) {
value = "toHalf2(" + value + ')';
@@ -643,6 +624,26 @@ private:
return "vec2(toHalf2(" + value + ")[1])";
}
}
default:
return CastOperand(value, type);
}
}
std::string CastOperand(const std::string& value, Type type) const {
switch (type) {
case Type::Bool:
case Type::Bool2:
case Type::Float:
return value;
case Type::Int:
return "ftoi(" + value + ')';
case Type::Uint:
return "ftou(" + value + ')';
case Type::HalfFloat:
// Can't be handled as a stand-alone value
UNREACHABLE();
return value;
}
UNREACHABLE();
return value;
}
@@ -650,6 +651,7 @@ private:
std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
switch (type) {
case Type::Bool:
case Type::Bool2:
case Type::Float:
if (needs_parenthesis) {
return '(' + value + ')';
@@ -721,7 +723,7 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto count = static_cast<u32>(operation.GetOperandsCount());
const std::size_t count = operation.GetOperandsCount();
const bool has_array = meta->sampler.IsArray();
const bool has_shadow = meta->sampler.IsShadow();
@@ -732,10 +734,10 @@ private:
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (u32 i = 0; i < count; ++i) {
for (std::size_t i = 0; i < count; ++i) {
expr += Visit(operation[i]);
const u32 next = i + 1;
const std::size_t next = i + 1;
if (next < count || has_array || has_shadow)
expr += ", ";
}
@@ -1206,25 +1208,26 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
UNIMPLEMENTED_IF(meta->sampler.IsArray());
UNIMPLEMENTED_IF(!meta->extras.empty());
const auto count = static_cast<u32>(operation.GetOperandsCount());
const std::size_t count = operation.GetOperandsCount();
std::string expr = "texelFetch(";
expr += GetSampler(meta->sampler);
expr += ", ";
expr += constructors.at(count - 1);
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += '(';
for (u32 i = 0; i < count; ++i) {
for (std::size_t i = 0; i < count; ++i) {
expr += VisitOperand(operation, i, Type::Int);
const u32 next = i + 1;
const std::size_t next = i + 1;
if (next == count)
expr += ')';
if (next < count)
else if (next < count)
expr += ", ";
}
for (std::size_t i = 0; i < meta->extras.size(); ++i) {
expr += ", ";
expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
}
expr += ')';
return expr + GetSwizzle(meta->element);
@@ -1571,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
return {decompiler.GetResult(), decompiler.GetShaderEntries()};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

Some files were not shown because too many files have changed in this diff Show More