Compare commits

..

100 Commits

Author SHA1 Message Date
zarroboogs
be6466d5c0 added a toggle to force 30fps mode 2019-04-09 02:14:03 +03:00
bunnei
f14328bf0a Merge pull request #2300 from FernandoS27/null-shader
shader_cache: Permit a Null Shader in case of a bad host_ptr.
2019-04-07 17:58:27 -04:00
bunnei
c2fee0e519 Merge pull request #2355 from ReinUsesLisp/sync-point
maxwell_3d: Reduce severity of ProcessSyncPoint
2019-04-07 17:56:11 -04:00
bunnei
06ece52cfe Merge pull request #2359 from FearlessTobi/port-2-prs
Port citra-emu/citra#4718: "fix clang-format target when using a path with spaces on windows"
2019-04-07 17:54:57 -04:00
bunnei
8aaf418bd6 Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
2019-04-07 17:52:30 -04:00
bunnei
3c1ce290d0 Merge pull request #2361 from lioncash/pagetable
core/memory: Minor simplifications to page table management
2019-04-07 17:50:31 -04:00
bunnei
6b18a1592f Merge pull request #2321 from ReinUsesLisp/gl-state-rework
gl_state: Rework to enable individual applies
2019-04-07 17:50:07 -04:00
bunnei
21a4e7deea Merge pull request #2098 from FreddyFunk/disk-cache-zstd
gl_shader_disk_cache: Use Zstandard for compression
2019-04-07 17:48:33 -04:00
bunnei
52ad5fa0e8 Merge pull request #2356 from lioncash/pair
kernel/{server_port, server_session}: Return pairs instead of tuples from pair creation functions
2019-04-07 17:48:00 -04:00
bunnei
d9b1c24f4f Merge pull request #2362 from lioncash/enum
core/memory: Remove unused enum constants
2019-04-07 17:46:09 -04:00
bunnei
80162888e6 Merge pull request #2352 from bunnei/mem-manager-fixes
memory_manager: Improved implementation of read/write/copy block.
2019-04-07 17:44:59 -04:00
Fernando Sahmkow
021cd56bc9 Permit a Null Shader in case of a bad host_ptr. 2019-04-07 07:52:01 -04:00
Lioncash
36a1e6a982 core/memory: Remove unused enum constants
These are holdovers from Citra and can be removed.
2019-04-07 03:04:55 -04:00
Lioncash
abae7577d2 core/memory: Remove GetCurrentPageTable()
Now that nothing actually touches the internal page table aside from the
memory subsystem itself, we can remove the accessor to it.
2019-04-07 02:47:37 -04:00
Lioncash
a6a82bb004 arm/arm_dynarmic: Remove unnecessary current_page_table member
Given the page table will always be guaranteed to be that of whatever
the current process is, we no longer need to keep this around.
2019-04-07 02:43:51 -04:00
Lioncash
e779686a76 kernel: Handle page table switching within MakeCurrentProcess()
Centralizes the page table switching to one spot, rather than making
calling code deal with it everywhere.
2019-04-07 01:12:54 -04:00
khang06
945e39471d fix clang-format target when using a path with spaces on windows 2019-04-07 02:10:01 +02:00
Lioncash
7a7ffa602d kernel/server_session: Return a std::pair from CreateSessionPair()
Keeps the return type consistent with the function name. While we're at
it, we can also reduce the amount of boilerplate involved with handling
these by using structured bindings.
2019-04-06 01:42:03 -04:00
Lioncash
04d265562f kernel/server_port: Return a std::pair from CreatePortPair()
Returns the same type that the function name describes.
2019-04-06 01:36:53 -04:00
ReinUsesLisp
ddcb711ee8 maxwell_3d: Reduce severity of ProcessSyncPoint 2019-04-06 02:18:20 -03:00
bunnei
864280fabc Merge pull request #2317 from FernandoS27/sync
Implement SyncPoint Register in the GPU.
2019-04-05 23:50:54 -04:00
bunnei
7d1c0fd1ad Merge pull request #2325 from lioncash/name
kernel/server_session: Provide a GetName() override
2019-04-05 23:48:13 -04:00
bunnei
fddafa14c8 Merge pull request #2342 from lioncash/warning
common/multi_level_queue: Silence truncation warnings
2019-04-05 23:47:27 -04:00
bunnei
54c7e8e40e Merge pull request #2240 from FearlessTobi/port-4651
Port citra-emu/citra#4651: "gdbstub: Fix some bugs in IsMemoryBreak() and ServeBreak. Add workaround to let watchpoints break into GDB."
2019-04-05 23:46:37 -04:00
bunnei
e3402d976d Merge pull request #2346 from lioncash/header
video_core/engines: Remove unnecessary inclusions where applicable
2019-04-05 23:44:27 -04:00
bunnei
20be92d5e6 memory_manager: Improved implementation of read/write/copy block.
- Fixes graphical issues with Chocobo's Mystery Dungeon EVERY BUDDY!
- Fixes a crash with Mario Tennis Aces
2019-04-05 23:43:34 -04:00
bunnei
89b8801a97 Merge pull request #2350 from lioncash/vmem
video_core/memory_manager: Mark a few member functions with the const qualifier
2019-04-05 23:40:54 -04:00
bunnei
00207cc965 Merge pull request #2340 from lioncash/view
file_sys/fsmitm_romfsbuild: Utilize a string_view in romfs_calc_path_hash
2019-04-05 23:40:16 -04:00
bunnei
e86b26cd2b Merge pull request #2334 from lioncash/override
core: Add missing override specifiers where applicable
2019-04-05 23:39:52 -04:00
bunnei
41890a84be Merge pull request #2347 from lioncash/trunc
video_core/gpu_thread: Silence truncation warning in ThreadManager's constructor
2019-04-05 23:39:31 -04:00
bunnei
23d3cd7604 Merge pull request #2341 from lioncash/compare
file_sys/nca_metadata: Remove unnecessary comparison operators for TitleType
2019-04-05 23:38:37 -04:00
bunnei
d6cddffb78 Merge pull request #2339 from lioncash/rank
service/fsp_srv: Update SaveDataInfo and SaveDataDescriptor structs
2019-04-05 23:36:46 -04:00
bunnei
520e4e5d4b Merge pull request #2327 from ReinUsesLisp/crash-safe-visit
gl_shader_decompiler: Return early when an operation is invalid
2019-04-05 23:36:18 -04:00
bunnei
b8fbd125e6 Merge pull request #2343 from lioncash/todo
file_sys/program_metadata: Remove obsolete TODOs
2019-04-05 23:35:54 -04:00
bunnei
cb2209d06a Merge pull request #2337 from lioncash/temporary
gl_shader_decompiler: Rename GenerateTemporal() to GenerateTemporary()
2019-04-05 23:35:31 -04:00
bunnei
854ac468b9 Merge pull request #2329 from lioncash/sanitize
kernel/svc: Properly sanitize mutex address in WaitProcessWideKeyAtomic
2019-04-05 23:35:06 -04:00
bunnei
150a3c0890 Merge pull request #2344 from lioncash/result
hle/result: Remove unnecessary bitfield entry for ResultCode
2019-04-05 23:34:42 -04:00
bunnei
d9ee5b874c Merge pull request #2349 from lioncash/surface
yuzu/debugger/graphics/graphics_surface: General cleanup
2019-04-05 23:33:45 -04:00
bunnei
8a1bcc3d30 Merge pull request #2351 from lioncash/macro
video_core/macro_interpreter: Simplify GetRegister()
2019-04-05 23:32:26 -04:00
Lioncash
00e7190e29 video_core/macro_interpreter: Remove assertion within FetchParameter()
We can just use .at(), which essentially does the same thing, but with
less code.
2019-04-05 22:56:58 -04:00
Lioncash
1efdb4897e video_core/macro_interpreter: Simplify GetRegister()
Given we already ensure nothing can set the zeroth register in
SetRegister(), we don't need to check if the index is zero and special
case it. We can just access the register normally, since it's already
going to be zero.

We can also replace the assertion with .at() to perform the equivalent
behavior inline as part of the API.
2019-04-05 22:55:13 -04:00
Lioncash
c13fbe6a41 video_core/memory_manager: Make Read() a const qualified member function
Given this doesn't actually alter internal state, this can be made a
const member function.
2019-04-05 20:30:48 -04:00
Lioncash
76ef6e5c2b video_core/memory_manager: Make ReadBlock() a const qualifier member function
Now, since we have a const qualified variant of GetPointer(), we can put
it to use in ReadBlock() to retrieve the source pointer that is passed
into memcpy.

Now block reading may be done from a const context.
2019-04-05 20:28:44 -04:00
Lioncash
34510bcda8 video_core/memory_manager: Add a const qualified variant of GetPointer()
Allows retrieving read-only pointers from a const context externally.
2019-04-05 20:25:28 -04:00
Lioncash
085b388a7a video_core/memory_manager: Make FindFreeRegion() a const member function
This doesn't modify internal state, so it can be made a const member
function.
2019-04-05 20:22:55 -04:00
Lioncash
9dec087fca video_core/memory_manager: Make GpuToCpuAddress() a const member function
This doesn't modify any internal state, so it can be made a const member
function to allow its use in const contexts.
2019-04-05 20:18:29 -04:00
Lioncash
c0e320ad0d yuzu/debugger/graphics_surface: Display error messages for file I/O errors 2019-04-05 19:54:53 -04:00
Lioncash
845607481c yuzu/debugger/graphics_surface: Tidy up SaveSurface
- Use QStringLiteral where applicable.

- Use const where applicable

- Remove unnecessary precondition check (we already assert the pixbuf
  being non null)
2019-04-05 19:33:00 -04:00
Lioncash
bbeb859122 yuzu/debugger/graphics_surface: Clean up connection overload deduction
We can utilize qOverload with the signal connections to make the
function deducing a little less ugly.
2019-04-05 19:26:43 -04:00
Fernando Sahmkow
fc91e21206 Implement SyncPoint Register in the GPU. 2019-04-05 19:19:30 -04:00
Lioncash
9d296f8a35 yuzu/debugger/graphics_surface: Fill in missing surface format listings
Fills in the missing surface types that were marked as unknown. The
order corresponds with the TextureFormat enum within
video_core/texture.h.

We also don't need to all of these strings as translatable (only the
first string, as it's an English word).
2019-04-05 19:09:56 -04:00
Lioncash
30ce9b2b5c video_core/gpu_thread: Silence truncation warning in ThreadManager's constructor
Since c5d41fd812 callback parameters were
changed to use an s64 to represent late cycles instead of an int, so
this was causing a truncation warning to occur here. Changing it to s64
is sufficient to silence the warning.
2019-04-05 18:37:37 -04:00
Lioncash
22f02076c6 video_core/engines: Make memory manager members private
These aren't used externally by anything, so they can be made private
data members.
2019-04-05 18:26:43 -04:00
Lioncash
26223f8124 video_core/engines: Remove unnecessary inclusions where applicable
Replaces header inclusions with forward declarations where applicable
and also removes unused headers within the cpp file. This reduces a few
more dependencies on core/memory.h
2019-04-05 18:26:32 -04:00
Lioncash
5dfcf7cf26 hle/result: Remove unnecessary bitfield entry for ResultCode
This is a hold over from the 3DS error codes in Citra.
2019-04-05 16:34:34 -04:00
Lioncash
37b23efece file_sys/program_metadata: Remove obsolete TODOs
BitField has been trivially copyable since
e99a148628, so we can eliminate these
TODO comments and use ReadObject() directly instead of memcpying the
data.
2019-04-05 15:57:47 -04:00
Lioncash
93b84e9308 common/multi_level_queue: Silence truncation warning in iterator operator++ 2019-04-05 15:35:46 -04:00
Lioncash
33db37e669 common/bit_util: Make CountLeading/CountTrailing functions have the same return types
Makes the return type consistently uniform (like the intrinsics we're
wrapping). This also conveniently silences a truncation warning within
the kernel multi_level_queue.
2019-04-05 15:29:40 -04:00
Lioncash
a1868286b0 file_sys/nca_metadata: Remove unnecessary comparison operators for TitleType
enum class elements from the same enum can already be compared against
one another without the need for explicitly defined comparison
operators.
2019-04-05 15:20:07 -04:00
Lioncash
28e36de56f file_sys/fsmitm_romfsbuild: Utilize a string_view in romfs_calc_path_hash()
The given string instance doesn't need to be copied entirely, we can
just use a view instead.
2019-04-05 15:12:20 -04:00
Lioncash
c05c8a7a06 service/fsp_srv: Don't pass SaveDataDescriptor instances by value.
Passing around a 64 byte data struct by value is kind of wasteful,
instead pass a reference to the struct.
2019-04-05 11:04:01 -04:00
Lioncash
d0ed3ff4b7 service/fsp_srv: Remove unnecessary unknown member in OpenSaveDataFileSystem
The unknown member here is actually padding due to being passed as a
struct. We can do the same, and remove the need to pop a padding word.
2019-04-05 11:04:01 -04:00
Lioncash
d9ee58a3b5 service/fsp_srv: Update SaveDataInfo and SaveDataDescriptor structs
I realized that I updated the documentation on SwitchBrew a while ago,
but never actually updated the structs within yuzu.
2019-04-05 11:03:53 -04:00
bunnei
d6b7195192 Merge pull request #2338 from lioncash/fs
filesystem: Use a std::string_view in OpenFile()
2019-04-05 10:52:54 -04:00
bunnei
66be5150d6 Merge pull request #2282 from bunnei/gpu-asynch-v2
gpu_thread: Improve synchronization by using CoreTiming.
2019-04-04 22:38:04 -04:00
bunnei
7c1af3aa10 Merge pull request #2292 from lioncash/nacp
file_sys/control_metadata: Amend naming of members
2019-04-04 22:37:42 -04:00
bunnei
e6a9459b04 Merge pull request #2335 from lioncash/service-unused
hle/service: Resolve unused variable warnings
2019-04-04 22:36:57 -04:00
bunnei
f7d6e08688 Merge pull request #2336 from ReinUsesLisp/txq
gl_shader_decompiler: Fix TXQ types
2019-04-04 22:36:19 -04:00
bunnei
9959c95966 Merge pull request #2331 from lioncash/cache
yuzu/main: Minor adjustments to OnTransferableShaderCacheOpenFile()
2019-04-04 22:35:53 -04:00
bunnei
8502cda17a Merge pull request #2333 from lioncash/video-include
video_core/renderer_opengl: Remove unnecessary includes
2019-04-04 22:35:24 -04:00
bunnei
09789c3ffc Merge pull request #2332 from lioncash/include
yuzu/main: Remove unnecessary includes
2019-04-04 22:34:53 -04:00
Lioncash
15e0c4c4ec filesystem: Use a std::string_view in OpenFile()
Rather than make a full copy of the path, we can just use a string view
and truncate the viewed portion of the string instead of creating a totally
new truncated string.
2019-04-04 20:59:00 -04:00
Lioncash
52746ed8dc gl_shader_decompiler: Rename GenerateTemporal() to GenerateTemporary()
Temporal generally indicates a relation to time, but this is just
creating a temporary, so this isn't really an accurate name for what the
function is actually doing.
2019-04-04 19:35:04 -04:00
ReinUsesLisp
88a3c05b7b gl_shader_decompiler: Fix TXQ types
TXQ returns integer types. Shaders usually do:

R0 = TXQ(); // => int
R0 = static_cast<float>(R0);

If we don't treat it as an integer, it will cast a binary float value as
float - resulting in a corrupted number.
2019-04-04 20:07:11 -03:00
Lioncash
7f506be2ee hle/service: Resolve unused variable warnings
In several places, we have request parsers where there's nothing to
really parse, simply because the HLE function in question operates on
buffers. In these cases we can just remove these instances altogether.

In the other cases, we can retrieve the relevant members from the parser
and at least log them out, giving them some use.
2019-04-04 13:18:09 -04:00
Lioncash
5b0a9f8ba8 core: Add missing override specifiers where applicable
Applies the override specifier where applicable. In the case of
destructors that are  defaulted in their definition, they can
simply be removed.

This also removes the unnecessary inclusions being done in audin_u and
audrec_u, given their close proximity.
2019-04-04 12:19:44 -04:00
Lioncash
3fd5998d84 video_core/renderer_opengl: Remove unnecessary includes
Quite a few unused includes have built up over time, particularly on
core/memory.h. Removing these includes means the source files including
those files will no longer need to be rebuilt if they're changed, making
compilation slightly faster in this scenario.
2019-04-04 12:00:46 -04:00
Lioncash
e8f3d85ea5 yuzu/main: Remove unnecessary includes
While we're at it, don't use <QtGui> and <QtWidgets> and instead include
exactly which headers we actually need.
2019-04-04 11:29:19 -04:00
Lioncash
a973271b8c file_sys/control_metadata: Amend naming of members
Quite a bit of these were out of sync with Switchbrew (and in some cases
entirely wrong). While we're at it, also expand the section of named
members. A segment within the control metadata is used to specify
maximum values for the user, device, and cache storage max sizes and
journal sizes.

These appear to be generally used by the am service (e.g. in
CreateCacheStorage, etc).
2019-04-03 21:18:12 -04:00
Lioncash
c39c8e6982 kernel/svc: Properly sanitize mutex address in WaitProcessWideKeyAtomic
We need to be checking whether or not the given address is within the
kernel address space or if the given address isn't word-aligned and bail
in these scenarios instead of trashing any kernel state.
2019-04-03 20:25:41 -04:00
ReinUsesLisp
78bd66d037 gl_state: Rework to enable individual applies 2019-04-03 20:26:27 -03:00
ReinUsesLisp
06c1f75f21 gl_shader_decompiler: Return early when an operation is invalid 2019-04-03 16:02:09 -03:00
Lioncash
a074363a5d kernel/server_session: Provide a GetName() override
Given server sessions can be given a name, we should allow retrieving
it instead of using the default implementation of GetName(), which would
just return "[UNKNOWN KERNEL OBJECT]".
2019-04-03 10:39:06 -04:00
bunnei
4555b63750 gpu_thread: Improve synchronization by using CoreTiming. 2019-04-01 21:32:39 -04:00
ReinUsesLisp
38658b38b4 gl_shader_decompiler: Hide local definitions inside an anonymous namespace 2019-03-31 00:26:34 -03:00
Mat M
da02946f4f shader_ir/decode: Silent implicit sign conversion warning
Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>
2019-03-31 00:12:54 -03:00
ReinUsesLisp
e8abe4b77c gl_shader_decompiler: Add AOFFI backing implementation 2019-03-30 02:55:18 -03:00
ReinUsesLisp
cb68ce7c2f shader_ir/decode: Implement AOFFI for TEX and TLD4 2019-03-30 02:53:29 -03:00
ReinUsesLisp
cf4ecc1945 shader_ir: Implement immediate register tracking 2019-03-30 02:53:16 -03:00
unknown
eadc834bb3 gitmodules: Replace taps with spaces 2019-03-29 18:22:08 +01:00
unknown
b4857e326f common/zstd_compression: simplify decompression interface 2019-03-29 18:22:08 +01:00
unknown
aa92da205e gl_shader_disk_cache: Fixup clang format 2019-03-29 18:22:08 +01:00
unknown
35ebbbc167 gl_shader_disk_cache: Use Zstandard for compression 2019-03-29 18:22:08 +01:00
unknown
72477731ed common/zstd_compression: Add Zstandard wrapper 2019-03-29 18:22:08 +01:00
unknown
ca82589350 common: Link libzstd_static 2019-03-29 18:22:07 +01:00
unknown
d85c1141b9 externals: Add libzstd_static to externals CMakeLists.txt 2019-03-29 18:22:07 +01:00
unknown
93de7a7b40 externals: Add Zstandard v1.3.8 2019-03-29 18:22:07 +01:00
unknown
a05f94dcc8 Addressed feedback 2019-03-29 18:22:07 +01:00
unknown
cec7da37b9 core: Do not link LZ4 to core. Use common/data_compression for nso segment decompression instead. 2019-03-29 18:20:48 +01:00
Dimitri A
0e7ad1c367 gdbstub: Fix some bugs in IsMemoryBreak() and ServeBreak. Add workaround to let watchpoints break into GDB. (#4651)
* gdbstub: fix IsMemoryBreak() returning false while connected to client

As a result, the only existing codepath for a memory watchpoint hit to break into GDB (InterpeterMainLoop, GDB_BP_CHECK, ARMul_State::RecordBreak) is finally taken,
which exposes incorrect logic* in both RecordBreak and ServeBreak.

* a blank BreakpointAddress structure is passed, which sets r15 (PC) to NULL

* gdbstub: DynCom: default-initialize two members/vars used in conditionals

* gdbstub: DynCom: don't record memory watchpoint hits via RecordBreak()

For now, instead check for GDBStub::IsMemoryBreak() in InterpreterMainLoop and ServeBreak.

Fixes PC being set to a stale/unhit breakpoint address (often zero) when a memory watchpoint (rwatch, watch, awatch) is handled in ServeBreak() and generates a GDB trap.

Reasons for removing a call to RecordBreak() for memory watchpoints:
* The``breakpoint_data`` we pass is typed Execute or None. It describes the predicted next code breakpoint hit relative to PC;

* GDBStub::IsMemoryBreak() returns true if a recent Read/Write operation hit a watchpoint. It doesn't specify which in return, nor does it trace it anywhere. Thus, the only data we could give RecordBreak() is a placeholder BreakpointAddress at offset NULL and type Access. I found the idea silly, compared to simply relying on GDBStub::IsMemoryBreak().

There is currently no measure in the code that remembers the addresses (and types) of any watchpoints that were hit by an instruction, in order to send them to GDB as "extended stop information."
I'm considering an implementation for this.

* gdbstub: Change an ASSERT to DEBUG_ASSERT

I have never seen the (Reg[15] == last_bkpt.address) assert fail in practice, even after several weeks of (locally) developping various branches around GDB.  Only leave it inside Debug builds.
2019-03-15 16:31:06 +01:00
102 changed files with 1397 additions and 965 deletions

3
.gitmodules vendored
View File

@@ -40,3 +40,6 @@
[submodule "Vulkan-Headers"]
path = externals/Vulkan-Headers
url = https://github.com/KhronosGroup/Vulkan-Headers.git
[submodule "externals/zstd"]
path = externals/zstd
url = https://github.com/facebook/zstd

View File

@@ -309,7 +309,7 @@ if (CLANG_FORMAT)
set(CCOMMENT "Running clang format against all the .h and .cpp files in src/")
if (WIN32)
add_custom_target(clang-format
COMMAND powershell.exe -Command "Get-ChildItem ${SRCS}/* -Include *.cpp,*.h -Recurse | Foreach {${CLANG_FORMAT} -i $_.fullname}"
COMMAND powershell.exe -Command "Get-ChildItem '${SRCS}/*' -Include *.cpp,*.h -Recurse | Foreach {&'${CLANG_FORMAT}' -i $_.fullname}"
COMMENT ${CCOMMENT})
elseif(MINGW)
add_custom_target(clang-format

View File

@@ -49,6 +49,10 @@ add_subdirectory(open_source_archives EXCLUDE_FROM_ALL)
add_library(unicorn-headers INTERFACE)
target_include_directories(unicorn-headers INTERFACE ./unicorn/include)
# Zstandard
add_subdirectory(zstd/build/cmake EXCLUDE_FROM_ALL)
target_include_directories(libzstd_static INTERFACE ./zstd/lib)
# SoundTouch
add_subdirectory(soundtouch)

1
externals/zstd vendored Submodule

Submodule externals/zstd added at 470344d33e

View File

@@ -125,6 +125,8 @@ add_library(common STATIC
uint128.h
vector_math.h
web_result.h
zstd_compression.cpp
zstd_compression.h
)
if(ARCHITECTURE_x86_64)
@@ -138,4 +140,4 @@ endif()
create_target_directory_groups(common)
target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
target_link_libraries(common PRIVATE lz4_static)
target_link_libraries(common PRIVATE lz4_static libzstd_static)

View File

@@ -57,3 +57,21 @@ __declspec(noinline, noreturn)
#define UNIMPLEMENTED_IF(cond) ASSERT_MSG(!(cond), "Unimplemented code!")
#define UNIMPLEMENTED_IF_MSG(cond, ...) ASSERT_MSG(!(cond), __VA_ARGS__)
// If the assert is ignored, execute _b_
#define ASSERT_OR_EXECUTE(_a_, _b_) \
do { \
ASSERT(_a_); \
if (!(_a_)) { \
_b_ \
} \
} while (0)
// If the assert is ignored, execute _b_
#define ASSERT_OR_EXECUTE_MSG(_a_, _b_, ...) \
do { \
ASSERT_MSG(_a_, __VA_ARGS__); \
if (!(_a_)) { \
_b_ \
} \
} while (0)

View File

@@ -32,7 +32,7 @@ inline u32 CountLeadingZeroes32(u32 value) {
return 32;
}
inline u64 CountLeadingZeroes64(u64 value) {
inline u32 CountLeadingZeroes64(u64 value) {
unsigned long leading_zero = 0;
if (_BitScanReverse64(&leading_zero, value) != 0) {
@@ -47,15 +47,15 @@ inline u32 CountLeadingZeroes32(u32 value) {
return 32;
}
return __builtin_clz(value);
return static_cast<u32>(__builtin_clz(value));
}
inline u64 CountLeadingZeroes64(u64 value) {
inline u32 CountLeadingZeroes64(u64 value) {
if (value == 0) {
return 64;
}
return __builtin_clzll(value);
return static_cast<u32>(__builtin_clzll(value));
}
#endif
@@ -70,7 +70,7 @@ inline u32 CountTrailingZeroes32(u32 value) {
return 32;
}
inline u64 CountTrailingZeroes64(u64 value) {
inline u32 CountTrailingZeroes64(u64 value) {
unsigned long trailing_zero = 0;
if (_BitScanForward64(&trailing_zero, value) != 0) {
@@ -85,15 +85,15 @@ inline u32 CountTrailingZeroes32(u32 value) {
return 32;
}
return __builtin_ctz(value);
return static_cast<u32>(__builtin_ctz(value));
}
inline u64 CountTrailingZeroes64(u64 value) {
inline u32 CountTrailingZeroes64(u64 value) {
if (value == 0) {
return 64;
}
return __builtin_ctzll(value);
return static_cast<u32>(__builtin_ctzll(value));
}
#endif

View File

@@ -72,7 +72,7 @@ public:
u64 prios = mlq.used_priorities;
prios &= ~((1ULL << (current_priority + 1)) - 1);
if (prios == 0) {
current_priority = mlq.depth();
current_priority = static_cast<u32>(mlq.depth());
} else {
current_priority = CountTrailingZeroes64(prios);
it = GetBeginItForPrio();

View File

@@ -0,0 +1,53 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <zstd.h>
#include "common/assert.h"
#include "common/zstd_compression.h"
namespace Common::Compression {
std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) {
compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel());
const std::size_t max_compressed_size = ZSTD_compressBound(source_size);
std::vector<u8> compressed(max_compressed_size);
const std::size_t compressed_size =
ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level);
if (ZSTD_isError(compressed_size)) {
// Compression failed
return {};
}
compressed.resize(compressed_size);
return compressed;
}
std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) {
return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT);
}
std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) {
const std::size_t decompressed_size =
ZSTD_getDecompressedSize(compressed.data(), compressed.size());
std::vector<u8> decompressed(decompressed_size);
const std::size_t uncompressed_result_size = ZSTD_decompress(
decompressed.data(), decompressed.size(), compressed.data(), compressed.size());
if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) {
// Decompression failed
return {};
}
return decompressed;
}
} // namespace Common::Compression

View File

@@ -0,0 +1,42 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <vector>
#include "common/common_types.h"
namespace Common::Compression {
/**
* Compresses a source memory region with Zstandard and returns the compressed data in a vector.
*
* @param source the uncompressed source memory region.
* @param source_size the size in bytes of the uncompressed source memory region.
* @param compression_level the used compression level. Should be between 1 and 22.
*
* @return the compressed data.
*/
std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level);
/**
* Compresses a source memory region with Zstandard with the default compression level and returns
* the compressed data in a vector.
*
* @param source the uncompressed source memory region.
* @param source_size the size in bytes of the uncompressed source memory region.
*
* @return the compressed data.
*/
std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size);
/**
* Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector.
*
* @param compressed the compressed source memory region.
*
* @return the decompressed data.
*/
std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed);
} // namespace Common::Compression

View File

@@ -26,7 +26,6 @@ using Vector = Dynarmic::A64::Vector;
class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks {
public:
explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {}
~ARM_Dynarmic_Callbacks() = default;
u8 MemoryRead8(u64 vaddr) override {
return Memory::Read8(vaddr);
@@ -164,7 +163,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)
void ARM_Dynarmic::Run() {
MICROPROFILE_SCOPE(ARM_Jit_Dynarmic);
ASSERT(Memory::GetCurrentPageTable() == current_page_table);
jit->Run();
}
@@ -279,7 +277,6 @@ void ARM_Dynarmic::ClearExclusiveState() {
void ARM_Dynarmic::PageTableChanged() {
jit = MakeJit();
current_page_table = Memory::GetCurrentPageTable();
}
DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}

View File

@@ -12,10 +12,6 @@
#include "core/arm/exclusive_monitor.h"
#include "core/arm/unicorn/arm_unicorn.h"
namespace Common {
struct PageTable;
}
namespace Core::Timing {
class CoreTiming;
}
@@ -29,7 +25,7 @@ class ARM_Dynarmic final : public ARM_Interface {
public:
ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index);
~ARM_Dynarmic();
~ARM_Dynarmic() override;
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
Kernel::VMAPermission perms) override;
@@ -69,14 +65,12 @@ private:
std::size_t core_index;
Timing::CoreTiming& core_timing;
DynarmicExclusiveMonitor& exclusive_monitor;
Common::PageTable* current_page_table = nullptr;
};
class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
public:
explicit DynarmicExclusiveMonitor(std::size_t core_count);
~DynarmicExclusiveMonitor();
~DynarmicExclusiveMonitor() override;
void SetExclusive(std::size_t core_index, VAddr addr) override;
void ClearExclusive() override;

View File

@@ -192,12 +192,13 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
core_timing.AddTicks(num_instructions);
if (GDBStub::IsServerEnabled()) {
if (last_bkpt_hit) {
if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
}
Kernel::Thread* thread = Kernel::GetCurrentThread();
SaveContext(thread->GetContext());
if (last_bkpt_hit || GDBStub::GetCpuStepFlag()) {
if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
last_bkpt_hit = false;
GDBStub::Break();
GDBStub::SendTrap(thread, 5);

View File

@@ -18,7 +18,7 @@ namespace Core {
class ARM_Unicorn final : public ARM_Interface {
public:
explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
~ARM_Unicorn();
~ARM_Unicorn() override;
void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
Kernel::VMAPermission perms) override;
@@ -50,7 +50,7 @@ private:
uc_engine* uc{};
Timing::CoreTiming& core_timing;
GDBStub::BreakpointAddress last_bkpt{};
bool last_bkpt_hit;
bool last_bkpt_hit = false;
};
} // namespace Core

View File

@@ -67,7 +67,7 @@ std::string NACP::GetDeveloperName(Language language) const {
}
u64 NACP::GetTitleId() const {
return raw.title_id;
return raw.save_data_owner_id;
}
u64 NACP::GetDLCBaseTitleId() const {
@@ -80,11 +80,11 @@ std::string NACP::GetVersionString() const {
}
u64 NACP::GetDefaultNormalSaveSize() const {
return raw.normal_save_data_size;
return raw.user_account_save_data_size;
}
u64 NACP::GetDefaultJournalSaveSize() const {
return raw.journal_sava_data_size;
return raw.user_account_save_data_journal_size;
}
std::vector<u8> NACP::GetRawBytes() const {

View File

@@ -38,23 +38,35 @@ struct RawNACP {
u8 video_capture_mode;
bool data_loss_confirmation;
INSERT_PADDING_BYTES(1);
u64_le title_id;
u64_le presence_group_id;
std::array<u8, 0x20> rating_age;
std::array<char, 0x10> version_string;
u64_le dlc_base_title_id;
u64_le title_id_2;
u64_le normal_save_data_size;
u64_le journal_sava_data_size;
INSERT_PADDING_BYTES(0x18);
u64_le product_code;
u64_le save_data_owner_id;
u64_le user_account_save_data_size;
u64_le user_account_save_data_journal_size;
u64_le device_save_data_size;
u64_le device_save_data_journal_size;
u64_le bcat_delivery_cache_storage_size;
char application_error_code_category[8];
std::array<u64_le, 0x8> local_communication;
u8 logo_type;
u8 logo_handling;
bool runtime_add_on_content_install;
INSERT_PADDING_BYTES(5);
u64_le title_id_update;
std::array<u8, 0x40> bcat_passphrase;
INSERT_PADDING_BYTES(0xEC0);
u64_le seed_for_pseudo_device_id;
std::array<u8, 0x41> bcat_passphrase;
INSERT_PADDING_BYTES(7);
u64_le user_account_save_data_max_size;
u64_le user_account_save_data_max_journal_size;
u64_le device_save_data_max_size;
u64_le device_save_data_max_journal_size;
u64_le temporary_storage_size;
u64_le cache_storage_size;
u64_le cache_storage_journal_size;
u64_le cache_storage_data_and_journal_max_size;
u64_le cache_storage_max_index;
INSERT_PADDING_BYTES(0xE70);
};
static_assert(sizeof(RawNACP) == 0x4000, "RawNACP has incorrect size.");

View File

@@ -23,6 +23,7 @@
*/
#include <cstring>
#include <string_view>
#include "common/alignment.h"
#include "common/assert.h"
#include "core/file_sys/fsmitm_romfsbuild.h"
@@ -97,7 +98,8 @@ struct RomFSBuildFileContext {
VirtualFile source;
};
static u32 romfs_calc_path_hash(u32 parent, std::string path, u32 start, std::size_t path_len) {
static u32 romfs_calc_path_hash(u32 parent, std::string_view path, u32 start,
std::size_t path_len) {
u32 hash = parent ^ 123456789;
for (u32 i = 0; i < path_len; i++) {
hash = (hash >> 5) | (hash << 27);

View File

@@ -10,14 +10,6 @@
namespace FileSys {
bool operator>=(TitleType lhs, TitleType rhs) {
return static_cast<std::size_t>(lhs) >= static_cast<std::size_t>(rhs);
}
bool operator<=(TitleType lhs, TitleType rhs) {
return static_cast<std::size_t>(lhs) <= static_cast<std::size_t>(rhs);
}
CNMT::CNMT(VirtualFile file) {
if (file->ReadObject(&header) != sizeof(CNMTHeader))
return;

View File

@@ -29,9 +29,6 @@ enum class TitleType : u8 {
DeltaTitle = 0x83,
};
bool operator>=(TitleType lhs, TitleType rhs);
bool operator<=(TitleType lhs, TitleType rhs);
enum class ContentRecordType : u8 {
Meta = 0,
Program = 1,

View File

@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <cstddef>
#include <cstring>
#include <vector>
#include "common/logging/log.h"
@@ -17,28 +16,30 @@ ProgramMetadata::ProgramMetadata() = default;
ProgramMetadata::~ProgramMetadata() = default;
Loader::ResultStatus ProgramMetadata::Load(VirtualFile file) {
std::size_t total_size = static_cast<std::size_t>(file->GetSize());
if (total_size < sizeof(Header))
const std::size_t total_size = file->GetSize();
if (total_size < sizeof(Header)) {
return Loader::ResultStatus::ErrorBadNPDMHeader;
}
// TODO(DarkLordZach): Use ReadObject when Header/AcidHeader becomes trivially copyable.
std::vector<u8> npdm_header_data = file->ReadBytes(sizeof(Header));
if (sizeof(Header) != npdm_header_data.size())
if (sizeof(Header) != file->ReadObject(&npdm_header)) {
return Loader::ResultStatus::ErrorBadNPDMHeader;
std::memcpy(&npdm_header, npdm_header_data.data(), sizeof(Header));
}
std::vector<u8> acid_header_data = file->ReadBytes(sizeof(AcidHeader), npdm_header.acid_offset);
if (sizeof(AcidHeader) != acid_header_data.size())
if (sizeof(AcidHeader) != file->ReadObject(&acid_header, npdm_header.acid_offset)) {
return Loader::ResultStatus::ErrorBadACIDHeader;
std::memcpy(&acid_header, acid_header_data.data(), sizeof(AcidHeader));
}
if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset))
if (sizeof(AciHeader) != file->ReadObject(&aci_header, npdm_header.aci_offset)) {
return Loader::ResultStatus::ErrorBadACIHeader;
}
if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset))
if (sizeof(FileAccessControl) != file->ReadObject(&acid_file_access, acid_header.fac_offset)) {
return Loader::ResultStatus::ErrorBadFileAccessControl;
if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset))
}
if (sizeof(FileAccessHeader) != file->ReadObject(&aci_file_access, aci_header.fah_offset)) {
return Loader::ResultStatus::ErrorBadFileAccessHeader;
}
aci_kernel_capabilities.resize(aci_header.kac_size / sizeof(u32));
const u64 read_size = aci_header.kac_size;

View File

@@ -58,7 +58,6 @@ public:
void Print() const;
private:
// TODO(DarkLordZach): BitField is not trivially copyable.
struct Header {
std::array<char, 4> magic;
std::array<u8, 8> reserved;
@@ -85,7 +84,6 @@ private:
static_assert(sizeof(Header) == 0x80, "NPDM header structure size is wrong");
// TODO(DarkLordZach): BitField is not trivially copyable.
struct AcidHeader {
std::array<u8, 0x100> signature;
std::array<u8, 0x100> nca_modulus;

View File

@@ -16,8 +16,10 @@ namespace FileSys {
constexpr char SAVE_DATA_SIZE_FILENAME[] = ".yuzu_save_size";
std::string SaveDataDescriptor::DebugInfo() const {
return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}]",
static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id);
return fmt::format("[type={:02X}, title_id={:016X}, user_id={:016X}{:016X}, save_id={:016X}, "
"rank={}, index={}]",
static_cast<u8>(type), title_id, user_id[1], user_id[0], save_id,
static_cast<u8>(rank), index);
}
SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save_directory)) {
@@ -28,7 +30,7 @@ SaveDataFactory::SaveDataFactory(VirtualDir save_directory) : dir(std::move(save
SaveDataFactory::~SaveDataFactory() = default;
ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, SaveDataDescriptor meta) {
ResultVal<VirtualDir> SaveDataFactory::Open(SaveDataSpaceId space, const SaveDataDescriptor& meta) {
if (meta.type == SaveDataType::SystemSaveData || meta.type == SaveDataType::SaveData) {
if (meta.zero_1 != 0) {
LOG_WARNING(Service_FS,

View File

@@ -32,12 +32,19 @@ enum class SaveDataType : u8 {
CacheStorage = 5,
};
enum class SaveDataRank : u8 {
Primary,
Secondary,
};
struct SaveDataDescriptor {
u64_le title_id;
u128 user_id;
u64_le save_id;
SaveDataType type;
INSERT_PADDING_BYTES(7);
SaveDataRank rank;
u16_le index;
INSERT_PADDING_BYTES(4);
u64_le zero_1;
u64_le zero_2;
u64_le zero_3;
@@ -57,7 +64,7 @@ public:
explicit SaveDataFactory(VirtualDir dir);
~SaveDataFactory();
ResultVal<VirtualDir> Open(SaveDataSpaceId space, SaveDataDescriptor meta);
ResultVal<VirtualDir> Open(SaveDataSpaceId space, const SaveDataDescriptor& meta);
VirtualDir GetSaveDataSpaceDirectory(SaveDataSpaceId space) const;

View File

@@ -1030,7 +1030,7 @@ static void Step() {
/// Tell the CPU if we hit a memory breakpoint.
bool IsMemoryBreak() {
if (IsConnected()) {
if (!IsConnected()) {
return false;
}

View File

@@ -139,10 +139,8 @@ public:
context->AddDomainObject(std::move(iface));
} else {
auto& kernel = Core::System::GetInstance().Kernel();
auto sessions =
auto [server, client] =
Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
iface->ClientConnected(server);
context->AddMoveObject(std::move(client));
}

View File

@@ -2,8 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <tuple>
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/kernel/errors.h"
@@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
active_sessions++;
// Create a new session pair, let the created sessions inherit the parent port's HLE handler.
auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);
if (server_port->HasHLEHandler()) {
server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
server_port->GetHLEHandler()->ClientConnected(server);
} else {
server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
server_port->AppendPendingSession(server);
}
// Wake the threads waiting on the ServerPort
server_port->WakeupAllWaitingThreads();
return MakeResult(std::get<SharedPtr<ClientSession>>(sessions));
return MakeResult(client);
}
void ClientPort::ConnectionClosed() {

View File

@@ -21,6 +21,7 @@
#include "core/hle/kernel/thread.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
#include "core/memory.h"
namespace Kernel {
@@ -181,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
void KernelCore::MakeCurrentProcess(Process* process) {
impl->current_process = process;
Memory::SetCurrentPageTable(&process->VMManager().page_table);
}
Process* KernelCore::CurrentProcess() {

View File

@@ -32,9 +32,6 @@ namespace {
* @param priority The priority to give the main thread
*/
void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
// Setup page table so we can write to memory
Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
// Initialize new "main" thread
const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,

View File

@@ -101,7 +101,6 @@ void Scheduler::SwitchContext(Thread* new_thread) {
auto* const thread_owner_process = current_thread->GetOwnerProcess();
if (previous_process != thread_owner_process) {
system.Kernel().MakeCurrentProcess(thread_owner_process);
Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
}
cpu_core.LoadContext(new_thread->GetContext());

View File

@@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) {
ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
}
std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
KernelCore& kernel, u32 max_sessions, std::string name) {
ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions,
std::string name) {
SharedPtr<ServerPort> server_port(new ServerPort(kernel));
SharedPtr<ClientPort> client_port(new ClientPort(kernel));
@@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP
client_port->max_sessions = max_sessions;
client_port->active_sessions = 0;
return std::make_tuple(std::move(server_port), std::move(client_port));
return std::make_pair(std::move(server_port), std::move(client_port));
}
} // namespace Kernel

View File

@@ -6,7 +6,7 @@
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
@@ -23,6 +23,7 @@ class SessionRequestHandler;
class ServerPort final : public WaitObject {
public:
using HLEHandler = std::shared_ptr<SessionRequestHandler>;
using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>;
/**
* Creates a pair of ServerPort and an associated ClientPort.
@@ -32,8 +33,8 @@ public:
* @param name Optional name of the ports
* @return The created port tuple
*/
static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair(
KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort");
static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions,
std::string name = "UnknownPort");
std::string GetTypeName() const override {
return "ServerPort";

View File

@@ -204,6 +204,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
client_session->parent = parent;
server_session->parent = parent;
return std::make_tuple(std::move(server_session), std::move(client_session));
return std::make_pair(std::move(server_session), std::move(client_session));
}
} // namespace Kernel

View File

@@ -6,6 +6,7 @@
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "core/hle/kernel/object.h"
@@ -41,6 +42,10 @@ public:
return "ServerSession";
}
std::string GetName() const override {
return name;
}
static const HandleType HANDLE_TYPE = HandleType::ServerSession;
HandleType GetHandleType() const override {
return HANDLE_TYPE;
@@ -54,7 +59,7 @@ public:
return parent.get();
}
using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;
/**
* Creates a pair of ServerSession and an associated ClientSession.

View File

@@ -1339,6 +1339,20 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
"called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
mutex_addr, condition_variable_addr, thread_handle, nano_seconds);
if (Memory::IsKernelVirtualAddress(mutex_addr)) {
LOG_ERROR(
Kernel_SVC,
"Given mutex address must not be within the kernel address space. address=0x{:016X}",
mutex_addr);
return ERR_INVALID_ADDRESS_STATE;
}
if (!Common::IsWordAligned(mutex_addr)) {
LOG_ERROR(Kernel_SVC, "Given mutex address must be word-aligned. address=0x{:016X}",
mutex_addr);
return ERR_INVALID_ADDRESS;
}
auto* const current_process = Core::System::GetInstance().Kernel().CurrentProcess();
const auto& handle_table = current_process->GetHandleTable();
SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);

View File

@@ -119,10 +119,6 @@ union ResultCode {
BitField<0, 9, ErrorModule> module;
BitField<9, 13, u32> description;
// The last bit of `level` is checked by apps and the kernel to determine if a result code is an
// error
BitField<31, 1, u32> is_error;
constexpr explicit ResultCode(u32 raw) : raw(raw) {}
constexpr ResultCode(ErrorModule module_, u32 description_)

View File

@@ -363,8 +363,6 @@ void ISelfController::SetPerformanceModeChangedNotification(Kernel::HLERequestCo
void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
// Takes 3 input u8s with each field located immediately after the previous
// u8, these are bool flags. No output.
LOG_WARNING(Service_AM, "(STUBBED) called");
IPC::RequestParser rp{ctx};
struct FocusHandlingModeParams {
@@ -372,7 +370,10 @@ void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
u8 unknown1;
u8 unknown2;
};
auto flags = rp.PopRaw<FocusHandlingModeParams>();
const auto flags = rp.PopRaw<FocusHandlingModeParams>();
LOG_WARNING(Service_AM, "(STUBBED) called. unknown0={}, unknown1={}, unknown2={}",
flags.unknown0, flags.unknown1, flags.unknown2);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);

View File

@@ -2,9 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/service/audio/audin_u.h"
namespace Service::Audio {
@@ -33,7 +30,6 @@ public:
RegisterHandlers(functions);
}
~IAudioIn() = default;
};
AudInU::AudInU() : ServiceFramework("audin:u") {

View File

@@ -150,7 +150,6 @@ private:
void GetReleasedAudioOutBufferImpl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called {}", ctx.Description());
IPC::RequestParser rp{ctx};
const u64 max_count{ctx.GetWriteBufferSize() / sizeof(u64)};
const auto released_buffers{audio_core.GetTagsAndReleaseBuffers(stream, max_count)};
@@ -194,12 +193,9 @@ private:
void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
IPC::RequestParser rp{ctx};
ctx.WriteBuffer(DefaultDevice);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(1); // Amount of audio devices
}

View File

@@ -2,9 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/service/audio/audrec_u.h"
namespace Service::Audio {
@@ -30,7 +27,6 @@ public:
RegisterHandlers(functions);
}
~IFinalOutputRecorder() = default;
};
AudRecU::AudRecU() : ServiceFramework("audrec:u") {

View File

@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/core.h"
#include "core/hle/ipc_helpers.h"
#include "core/hle/kernel/hle_ipc.h"
@@ -184,7 +185,6 @@ public:
private:
void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::RequestParser rp{ctx};
constexpr std::array<char, 15> audio_interface{{"AudioInterface"}};
ctx.WriteBuffer(audio_interface);
@@ -195,13 +195,13 @@ private:
}
void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::RequestParser rp{ctx};
f32 volume = static_cast<f32>(rp.Pop<u32>());
const f32 volume = rp.Pop<f32>();
auto file_buffer = ctx.ReadBuffer();
auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
const auto device_name_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(device_name_buffer);
LOG_WARNING(Service_Audio, "(STUBBED) called. name={}, volume={}", name, volume);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
@@ -209,7 +209,6 @@ private:
void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_Audio, "(STUBBED) called");
IPC::RequestParser rp{ctx};
constexpr std::array<char, 12> audio_interface{{"AudioDevice"}};
ctx.WriteBuffer(audio_interface);

View File

@@ -197,13 +197,16 @@ ResultCode VfsDirectoryServiceWrapper::RenameDirectory(const std::string& src_pa
ResultVal<FileSys::VirtualFile> VfsDirectoryServiceWrapper::OpenFile(const std::string& path_,
FileSys::Mode mode) const {
std::string path(FileUtil::SanitizePath(path_));
auto npath = path;
while (npath.size() > 0 && (npath[0] == '/' || npath[0] == '\\'))
npath = npath.substr(1);
const std::string path(FileUtil::SanitizePath(path_));
std::string_view npath = path;
while (!npath.empty() && (npath[0] == '/' || npath[0] == '\\')) {
npath.remove_prefix(1);
}
auto file = backing->GetFileRelative(npath);
if (file == nullptr)
if (file == nullptr) {
return FileSys::ERROR_PATH_NOT_FOUND;
}
if (mode == FileSys::Mode::Append) {
return MakeResult<FileSys::VirtualFile>(
@@ -319,15 +322,15 @@ ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId stora
}
ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
FileSys::SaveDataDescriptor save_struct) {
const FileSys::SaveDataDescriptor& descriptor) {
LOG_TRACE(Service_FS, "Opening Save Data for space_id={:01X}, save_struct={}",
static_cast<u8>(space), save_struct.DebugInfo());
static_cast<u8>(space), descriptor.DebugInfo());
if (save_data_factory == nullptr) {
return FileSys::ERROR_ENTITY_NOT_FOUND;
}
return save_data_factory->Open(space, save_struct);
return save_data_factory->Open(space, descriptor);
}
ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space) {

View File

@@ -46,7 +46,7 @@ ResultVal<FileSys::VirtualFile> OpenRomFSCurrentProcess();
ResultVal<FileSys::VirtualFile> OpenRomFS(u64 title_id, FileSys::StorageId storage_id,
FileSys::ContentRecordType type);
ResultVal<FileSys::VirtualDir> OpenSaveData(FileSys::SaveDataSpaceId space,
FileSys::SaveDataDescriptor save_struct);
const FileSys::SaveDataDescriptor& descriptor);
ResultVal<FileSys::VirtualDir> OpenSaveDataSpace(FileSys::SaveDataSpaceId space);
ResultVal<FileSys::VirtualDir> OpenSDMC();

View File

@@ -315,61 +315,53 @@ public:
void CreateFile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
u64 mode = rp.Pop<u64>();
u32 size = rp.Pop<u32>();
const u64 mode = rp.Pop<u64>();
const u32 size = rp.Pop<u32>();
LOG_DEBUG(Service_FS, "called file {} mode 0x{:X} size 0x{:08X}", name, mode, size);
LOG_DEBUG(Service_FS, "called. file={}, mode=0x{:X}, size=0x{:08X}", name, mode, size);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.CreateFile(name, size));
}
void DeleteFile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called file {}", name);
LOG_DEBUG(Service_FS, "called. file={}", name);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteFile(name));
}
void CreateDirectory(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called directory {}", name);
LOG_DEBUG(Service_FS, "called. directory={}", name);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.CreateDirectory(name));
}
void DeleteDirectory(Kernel::HLERequestContext& ctx) {
const IPC::RequestParser rp{ctx};
const auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
const std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called directory {}", name);
LOG_DEBUG(Service_FS, "called. directory={}", name);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteDirectory(name));
}
void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
const IPC::RequestParser rp{ctx};
const auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
const std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called directory {}", name);
LOG_DEBUG(Service_FS, "called. directory={}", name);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.DeleteDirectoryRecursively(name));
@@ -386,18 +378,16 @@ public:
}
void RenameFile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
std::vector<u8> buffer;
buffer.resize(ctx.BufferDescriptorX()[0].Size());
Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
std::string src_name = Common::StringFromBuffer(buffer);
const std::string src_name = Common::StringFromBuffer(buffer);
buffer.resize(ctx.BufferDescriptorX()[1].Size());
Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
std::string dst_name = Common::StringFromBuffer(buffer);
const std::string dst_name = Common::StringFromBuffer(buffer);
LOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);
LOG_DEBUG(Service_FS, "called. file '{}' to file '{}'", src_name, dst_name);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(backend.RenameFile(src_name, dst_name));
@@ -406,12 +396,12 @@ public:
void OpenFile(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
const auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
LOG_DEBUG(Service_FS, "called file {} mode {}", name, static_cast<u32>(mode));
LOG_DEBUG(Service_FS, "called. file={}, mode={}", name, static_cast<u32>(mode));
auto result = backend.OpenFile(name, mode);
if (result.Failed()) {
@@ -430,13 +420,13 @@ public:
void OpenDirectory(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
// TODO(Subv): Implement this filter.
u32 filter_flags = rp.Pop<u32>();
const u32 filter_flags = rp.Pop<u32>();
LOG_DEBUG(Service_FS, "called directory {} filter {}", name, filter_flags);
LOG_DEBUG(Service_FS, "called. directory={}, filter={}", name, filter_flags);
auto result = backend.OpenDirectory(name);
if (result.Failed()) {
@@ -453,12 +443,10 @@ public:
}
void GetEntryType(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto file_buffer = ctx.ReadBuffer();
const std::string name = Common::StringFromBuffer(file_buffer);
auto file_buffer = ctx.ReadBuffer();
std::string name = Common::StringFromBuffer(file_buffer);
LOG_DEBUG(Service_FS, "called file {}", name);
LOG_DEBUG(Service_FS, "called. file={}", name);
auto result = backend.GetEntryType(name);
if (result.Failed()) {
@@ -616,7 +604,9 @@ private:
u64_le save_id;
u64_le title_id;
u64_le save_image_size;
INSERT_PADDING_BYTES(0x28);
u16_le index;
FileSys::SaveDataRank rank;
INSERT_PADDING_BYTES(0x25);
};
static_assert(sizeof(SaveDataInfo) == 0x60, "SaveDataInfo has incorrect size.");
@@ -779,16 +769,17 @@ void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
}
void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
LOG_INFO(Service_FS, "called.");
struct Parameters {
FileSys::SaveDataSpaceId save_data_space_id;
FileSys::SaveDataDescriptor descriptor;
};
IPC::RequestParser rp{ctx};
const auto parameters = rp.PopRaw<Parameters>();
auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>();
auto unk = rp.Pop<u32>();
LOG_INFO(Service_FS, "called with unknown={:08X}", unk);
auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>();
auto dir = OpenSaveData(space_id, save_struct);
auto dir = OpenSaveData(parameters.save_data_space_id, parameters.descriptor);
if (dir.Failed()) {
IPC::ResponseBuilder rb{ctx, 2, 0, 0};
rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);

View File

@@ -18,7 +18,7 @@ class nvmap;
class nvdisp_disp0 final : public nvdevice {
public:
explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev);
~nvdisp_disp0();
~nvdisp_disp0() override;
u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;

View File

@@ -17,7 +17,7 @@ namespace Service::Nvidia {
class NVDRV final : public ServiceFramework<NVDRV> {
public:
NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
~NVDRV();
~NVDRV() override;
private:
void Open(Kernel::HLERequestContext& ctx);

View File

@@ -11,7 +11,7 @@ namespace Service::Nvidia {
class NVMEMP final : public ServiceFramework<NVMEMP> {
public:
NVMEMP();
~NVMEMP();
~NVMEMP() override;
private:
void Cmd0(Kernel::HLERequestContext& ctx);

View File

@@ -21,12 +21,13 @@
#include "core/hle/service/vi/display/vi_display.h"
#include "core/hle/service/vi/layer/vi_layer.h"
#include "core/perf_stats.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
namespace Service::NVFlinger {
constexpr std::size_t SCREEN_REFRESH_RATE = 60;
constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
constexpr s64 frame_ticks = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 60);
constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Timing::BASE_CLOCK_RATE / 30);
NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
displays.emplace_back(0, "Default");
@@ -36,13 +37,15 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
displays.emplace_back(4, "Null");
// Schedule the screen composition events
composition_event =
core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks;
composition_event = core_timing.RegisterEvent(
"ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) {
Compose();
this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event);
});
core_timing.ScheduleEvent(frame_ticks, composition_event);
core_timing.ScheduleEvent(ticks, composition_event);
}
NVFlinger::~NVFlinger() {
@@ -62,6 +65,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
const auto itr =
std::find_if(displays.begin(), displays.end(),
[&](const VI::Display& display) { return display.GetName() == name; });
if (itr == displays.end()) {
return {};
}

View File

@@ -90,7 +90,7 @@ private:
Kernel::HLERequestContext& ctx);
ServiceFrameworkBase(const char* service_name, u32 max_sessions, InvokerFn* handler_invoker);
~ServiceFrameworkBase();
~ServiceFrameworkBase() override;
void RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n);
void ReportUnimplementedFunction(Kernel::HLERequestContext& ctx, const FunctionInfoBase* info);

View File

@@ -11,7 +11,7 @@ namespace Service::Set {
class SET_CAL final : public ServiceFramework<SET_CAL> {
public:
explicit SET_CAL();
~SET_CAL();
~SET_CAL() override;
};
} // namespace Service::Set

View File

@@ -8,12 +8,20 @@
namespace Service::Sockets {
void SFDNSRES::GetAddrInfo(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
struct Parameters {
u8 use_nsd_resolve;
u32 unknown;
u64 process_id;
};
LOG_WARNING(Service, "(STUBBED) called");
IPC::RequestParser rp{ctx};
const auto parameters = rp.PopRaw<Parameters>();
LOG_WARNING(Service,
"(STUBBED) called. use_nsd_resolve={}, unknown=0x{:08X}, process_id=0x{:016X}",
parameters.use_nsd_resolve, parameters.unknown, parameters.process_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}

View File

@@ -26,9 +26,7 @@ Module::Interface::~Interface() = default;
void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_SPL, "called");
IPC::RequestParser rp{ctx};
std::size_t size = ctx.GetWriteBufferSize();
const std::size_t size = ctx.GetWriteBufferSize();
std::uniform_int_distribution<u16> distribution(0, std::numeric_limits<u8>::max());
std::vector<u8> data(size);

View File

@@ -64,13 +64,19 @@ public:
};
RegisterHandlers(functions);
}
~ISslContext() = default;
private:
void SetOption(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_SSL, "(STUBBED) called");
struct Parameters {
u8 enable;
u32 option;
};
IPC::RequestParser rp{ctx};
const auto parameters = rp.PopRaw<Parameters>();
LOG_WARNING(Service_SSL, "(STUBBED) called. enable={}, option={}", parameters.enable,
parameters.option);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);

View File

@@ -498,7 +498,6 @@ public:
};
RegisterHandlers(functions);
}
~IHOSBinderDriver() = default;
private:
enum class TransactionId {
@@ -692,7 +691,6 @@ public:
};
RegisterHandlers(functions);
}
~ISystemDisplayService() = default;
private:
void SetLayerZ(Kernel::HLERequestContext& ctx) {
@@ -818,7 +816,6 @@ public:
};
RegisterHandlers(functions);
}
~IManagerDisplayService() = default;
private:
void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -884,7 +881,6 @@ private:
class IApplicationDisplayService final : public ServiceFramework<IApplicationDisplayService> {
public:
explicit IApplicationDisplayService(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
~IApplicationDisplayService() = default;
private:
enum class ConvertedScaleMode : u64 {
@@ -1037,7 +1033,6 @@ private:
void ListDisplays(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_VI, "(STUBBED) called");
IPC::RequestParser rp{ctx};
DisplayInfo display_info;
display_info.width *= static_cast<u64>(Settings::values.resolution_factor);
display_info.height *= static_cast<u64>(Settings::values.resolution_factor);

View File

@@ -22,7 +22,7 @@ class AppLoader_NCA;
class AppLoader_XCI final : public AppLoader {
public:
explicit AppLoader_XCI(FileSys::VirtualFile file);
~AppLoader_XCI();
~AppLoader_XCI() override;
/**
* Returns the type of the file

View File

@@ -38,10 +38,6 @@ void SetCurrentPageTable(Common::PageTable* page_table) {
}
}
Common::PageTable* GetCurrentPageTable() {
return current_page_table;
}
static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
Common::PageType type) {
LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,

View File

@@ -28,16 +28,6 @@ constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
/// Virtual user-space memory regions
enum : VAddr {
/// Read-only page containing kernel and system configuration values.
CONFIG_MEMORY_VADDR = 0x1FF80000,
CONFIG_MEMORY_SIZE = 0x00001000,
CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE,
/// Usually read-only page containing mostly values read from hardware.
SHARED_PAGE_VADDR = 0x1FF81000,
SHARED_PAGE_SIZE = 0x00001000,
SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
/// TLS (Thread-Local Storage) related.
TLS_ENTRY_SIZE = 0x200,
@@ -50,9 +40,8 @@ enum : VAddr {
KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
};
/// Currently active page table
/// Changes the currently active page table.
void SetCurrentPageTable(Common::PageTable* page_table);
Common::PageTable* GetCurrentPageTable();
/// Determines if the given VAddr is valid for the specified process.
bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);

View File

@@ -393,6 +393,7 @@ struct Values {
bool use_disk_shader_cache;
bool use_accurate_gpu_emulation;
bool use_asynchronous_gpu_emulation;
bool force_30fps_mode;
float bg_red;
float bg_green;

View File

@@ -17,7 +17,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
: mutable_memory(mutable_memory_),
test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
kernel.MakeCurrentProcess(process.get());
page_table = &process->VMManager().page_table;
std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
@@ -28,7 +27,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
Memory::SetCurrentPageTable(page_table);
kernel.MakeCurrentProcess(process.get());
}
TestEnvironment::~TestEnvironment() {

View File

@@ -6,12 +6,13 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
: rasterizer{rasterizer}, memory_manager{memory_manager} {}
void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,

View File

@@ -10,7 +10,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
@@ -115,10 +118,9 @@ public:
};
} regs{};
MemoryManager& memory_manager;
private:
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.

View File

@@ -9,7 +9,10 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines {
@@ -40,10 +43,11 @@ public:
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
"KeplerCompute Regs has wrong size");
MemoryManager& memory_manager;
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
private:
MemoryManager& memory_manager;
};
#define ASSERT_REG_POSITION(field_name, position) \

View File

@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -15,7 +15,7 @@ namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
KeplerMemory::~KeplerMemory() = default;

View File

@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -82,8 +85,8 @@ public:
private:
Core::System& system;
MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
void ProcessData(u32 data);
};

View File

@@ -7,11 +7,10 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
@@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
macro_interpreter(*this) {
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
*this} {
InitializeRegisterDefaults();
}
@@ -250,6 +249,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryGet();
break;
}
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
}
default:
break;
}
@@ -327,6 +330,14 @@ void Maxwell3D::ProcessQueryGet() {
}
}
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
const u32 cache_flush = regs.sync_info.unknown.Value();
LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
cache_flush);
}
void Maxwell3D::DrawArrays() {
LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);

View File

@@ -16,13 +16,16 @@
#include "common/math_util.h"
#include "video_core/gpu.h"
#include "video_core/macro_interpreter.h"
#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -576,7 +579,17 @@ public:
u32 bind;
} macros;
INSERT_PADDING_WORDS(0x188);
INSERT_PADDING_WORDS(0x69);
struct {
union {
BitField<0, 16, u32> sync_point;
BitField<16, 1, u32> unknown;
BitField<20, 1, u32> increment;
};
} sync_info;
INSERT_PADDING_WORDS(0x11E);
u32 tfb_enabled;
@@ -1093,7 +1106,6 @@ public:
};
State state{};
MemoryManager& memory_manager;
struct DirtyFlags {
std::bitset<8> color_buffer{0xFF};
@@ -1141,6 +1153,8 @@ private:
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
/// Start offsets of each macro in macro_memory
std::unordered_map<u32, u32> macro_offsets;
@@ -1180,6 +1194,9 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
/// Handles writes to syncing register.
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
void ProcessCBData(u32 value);
@@ -1195,6 +1212,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);

View File

@@ -5,9 +5,9 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
@@ -16,7 +16,7 @@ namespace Tegra::Engines {
MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,

View File

@@ -10,12 +10,15 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Core {
class System;
}
namespace Tegra {
class MemoryManager;
}
namespace VideoCore {
class RasterizerInterface;
}
@@ -139,13 +142,13 @@ public:
};
} regs{};
MemoryManager& memory_manager;
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
MemoryManager& memory_manager;
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();

View File

@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>();
memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);

View File

@@ -9,7 +9,7 @@
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
: Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
: Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
GPUAsynch::~GPUAsynch() = default;

View File

@@ -4,6 +4,9 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
state.DecrementFramesCounter();
renderer.SwapBuffers(std::move(data->framebuffer));
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
@@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
} else {
UNREACHABLE();
}
state.signaled_fence = next.fence;
state.TrySynchronize();
}
}
}
ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
: renderer{renderer}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher),
std::ref(state)} {}
ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher)
: system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
synchronization_event = system.CoreTiming().RegisterEvent(
"GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
}
ThreadManager::~ThreadManager() {
// Notify GPU thread that a shutdown is pending
@@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() {
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(std::move(entries)));
const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
void ThreadManager::SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
state.IncrementFramesCounter();
PushCommand(SwapBuffersCommand(std::move(framebuffer)));
state.WaitForFrames();
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
@@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
if (state.queue.Empty()) {
// It's quicker to invalidate a single region on the CPU if the queue is already empty
renderer.Rasterizer().InvalidateRegion(addr, size);
system.Renderer().Rasterizer().InvalidateRegion(addr, size);
} else {
PushCommand(InvalidateRegionCommand(addr, size));
}
@@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
void ThreadManager::PushCommand(CommandData&& command_data) {
state.queue.Push(CommandDataContainer(std::move(command_data)));
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
state.SignalCommands();
return fence;
}
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
void SynchState::WaitForSynchronization(u64 fence) {
if (signaled_fence >= fence) {
return;
}
// Wait for the GPU to be idle (all commands to be executed)
{
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock<std::mutex> lock{synchronization_mutex};
synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
}
}
} // namespace VideoCommon::GPUThread

View File

@@ -19,9 +19,12 @@ struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
namespace VideoCore {
class RendererBase;
} // namespace VideoCore
namespace Core {
class System;
namespace Timing {
struct EventType;
} // namespace Timing
} // namespace Core
namespace VideoCommon::GPUThread {
@@ -75,63 +78,47 @@ using CommandData =
struct CommandDataContainer {
CommandDataContainer() = default;
CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
CommandDataContainer(CommandData&& data, u64 next_fence)
: data{std::move(data)}, fence{next_fence} {}
CommandDataContainer& operator=(const CommandDataContainer& t) {
data = std::move(t.data);
fence = t.fence;
return *this;
}
CommandData data;
u64 fence{};
};
/// Struct used to synchronize the GPU thread
struct SynchState final {
std::atomic_bool is_running{true};
std::atomic_int queued_frame_count{};
std::mutex frames_mutex;
std::mutex synchronization_mutex;
std::mutex commands_mutex;
std::condition_variable commands_condition;
std::condition_variable frames_condition;
std::condition_variable synchronization_condition;
void IncrementFramesCounter() {
std::lock_guard lock{frames_mutex};
++queued_frame_count;
/// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
/// synchronized. This is entirely empirical.
bool IsSynchronized() const {
constexpr std::size_t max_queue_gap{5};
return queue.Size() <= max_queue_gap;
}
void DecrementFramesCounter() {
{
std::lock_guard lock{frames_mutex};
--queued_frame_count;
if (queued_frame_count) {
return;
}
}
frames_condition.notify_one();
}
void WaitForFrames() {
{
std::lock_guard lock{frames_mutex};
if (!queued_frame_count) {
return;
}
}
// Wait for the GPU to be idle (all commands to be executed)
{
std::unique_lock lock{frames_mutex};
frames_condition.wait(lock, [this] { return !queued_frame_count; });
void TrySynchronize() {
if (IsSynchronized()) {
std::lock_guard<std::mutex> lock{synchronization_mutex};
synchronization_condition.notify_one();
}
}
void WaitForSynchronization(u64 fence);
void SignalCommands() {
{
std::unique_lock lock{commands_mutex};
if (queue.Empty()) {
return;
}
if (queue.Empty()) {
return;
}
commands_condition.notify_one();
@@ -144,12 +131,15 @@ struct SynchState final {
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
u64 last_fence{};
std::atomic<u64> signaled_fence{};
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
@@ -170,11 +160,12 @@ public:
private:
/// Pushes a command to be executed by the GPU thread
void PushCommand(CommandData&& command_data);
u64 PushCommand(CommandData&& command_data);
private:
SynchState state;
VideoCore::RendererBase& renderer;
Core::System& system;
Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};

View File

@@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
}
u32 MacroInterpreter::FetchParameter() {
ASSERT(next_parameter_index < parameters.size());
return parameters[next_parameter_index++];
return parameters.at(next_parameter_index++);
}
u32 MacroInterpreter::GetRegister(u32 register_id) const {
// Register 0 is supposed to always return 0.
if (register_id == 0)
return 0;
ASSERT(register_id < registers.size());
return registers[register_id];
return registers.at(register_id);
}
void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
// Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
// register.
if (register_id == 0)
// Register 0 is hardwired as the zero register.
// Ensure no writes to it actually occur.
if (register_id == 0) {
return;
}
ASSERT(register_id < registers.size());
registers[register_id] = value;
registers.at(register_id) = value;
}
void MacroInterpreter::SetMethodAddress(u32 address) {

View File

@@ -5,16 +5,13 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
namespace Tegra {
MemoryManager::MemoryManager() {
MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Common::PageType::Unmapped);
@@ -70,23 +67,23 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
aligned_size);
rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size);
return gpu_addr;
}
GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) {
GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const {
// Find the first Free VMA.
const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) {
if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
return false;
}
const VMAHandle vma_handle{
std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) {
if (vma.second.type != VirtualMemoryArea::Type::Unmapped) {
return false;
}
const VAddr vma_end{vma.second.base + vma.second.size};
return vma_end > region_start && vma_end >= region_start + size;
})};
const VAddr vma_end{vma.second.base + vma.second.size};
return vma_end > region_start && vma_end >= region_start + size;
})};
if (vma_handle == vma_map.end()) {
return {};
@@ -99,12 +96,12 @@ bool MemoryManager::IsAddressValid(GPUVAddr addr) const {
return (addr >> page_bits) < page_table.pointers.size();
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const {
if (!IsAddressValid(addr)) {
return {};
}
VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]};
if (cpu_addr) {
return cpu_addr + (addr & page_mask);
}
@@ -113,7 +110,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) {
}
template <typename T>
T MemoryManager::Read(GPUVAddr addr) {
T MemoryManager::Read(GPUVAddr addr) const {
if (!IsAddressValid(addr)) {
return {};
}
@@ -165,10 +162,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
}
}
template u8 MemoryManager::Read<u8>(GPUVAddr addr);
template u16 MemoryManager::Read<u16>(GPUVAddr addr);
template u32 MemoryManager::Read<u32>(GPUVAddr addr);
template u64 MemoryManager::Read<u64>(GPUVAddr addr);
template u8 MemoryManager::Read<u8>(GPUVAddr addr) const;
template u16 MemoryManager::Read<u16>(GPUVAddr addr) const;
template u32 MemoryManager::Read<u32>(GPUVAddr addr) const;
template u64 MemoryManager::Read<u64>(GPUVAddr addr) const;
template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data);
template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data);
template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data);
@@ -179,8 +176,8 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {};
}
u8* page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer) {
u8* const page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask);
}
@@ -188,15 +185,100 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
return {};
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
std::memcpy(dest_buffer, GetPointer(src_addr), size);
const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
if (!IsAddressValid(addr)) {
return {};
}
const u8* const page_pointer{page_table.pointers[addr >> page_bits]};
if (page_pointer != nullptr) {
return page_pointer + (addr & page_mask);
}
LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
return {};
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
std::memcpy(GetPointer(dest_addr), src_buffer, size);
std::size_t remaining_size{size};
std::size_t page_index{dest_addr >> page_bits};
std::size_t page_offset{dest_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
u8* dest_ptr{page_table.pointers[page_index] + page_offset};
rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
remaining_size -= copy_amount;
}
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
std::size_t remaining_size{size};
std::size_t page_index{src_addr >> page_bits};
std::size_t page_offset{src_addr & page_mask};
while (remaining_size > 0) {
const std::size_t copy_amount{
std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
WriteBlock(dest_addr, src_ptr, copy_amount);
break;
}
default:
UNREACHABLE();
}
page_index++;
page_offset = 0;
dest_addr += static_cast<VAddr>(copy_amount);
src_addr += static_cast<VAddr>(copy_amount);
remaining_size -= copy_amount;
}
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
@@ -336,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
const VirtualMemoryArea& vma{vma_handle->second};
if (vma.type == VirtualMemoryArea::Type::Mapped) {
// Region is already allocated
return {};
return vma_handle;
}
const VAddr start_in_vma{base - vma.base};

View File

@@ -10,6 +10,10 @@
#include "common/common_types.h"
#include "common/page_table.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra {
/**
@@ -43,24 +47,25 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
MemoryManager();
MemoryManager(VideoCore::RasterizerInterface& rasterizer);
GPUVAddr AllocateSpace(u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size);
std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr);
std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
template <typename T>
T Read(GPUVAddr addr);
T Read(GPUVAddr addr) const;
template <typename T>
void Write(GPUVAddr addr, T data);
u8* GetPointer(GPUVAddr addr);
const u8* GetPointer(GPUVAddr addr) const;
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
@@ -127,7 +132,7 @@ private:
void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
/// Finds a free (unmapped region) of the specified size starting at the specified address.
GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size);
GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const;
private:
static constexpr u64 page_bits{16};
@@ -143,6 +148,7 @@ private:
Common::PageTable page_table{page_bits};
VMAMap vma_map;
VideoCore::RasterizerInterface& rasterizer;
};
} // namespace Tegra

View File

@@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"

View File

@@ -4,7 +4,6 @@
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "video_core/renderer_opengl/gl_global_cache.h"

View File

@@ -7,7 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"

View File

@@ -4,11 +4,9 @@
#pragma once
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace OpenGL {

View File

@@ -17,7 +17,6 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/hle/kernel/process.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
@@ -26,7 +25,6 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/video_core.h"
namespace OpenGL {

View File

@@ -12,15 +12,12 @@
#include <optional>
#include <tuple>
#include <utility>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -29,10 +26,8 @@
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace Core {
class System;

View File

@@ -13,7 +13,6 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/morton.h"

View File

@@ -5,10 +5,9 @@
#pragma once
#include <array>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <tuple>
#include <vector>
#include "common/alignment.h"

View File

@@ -39,6 +39,10 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
ASSERT_OR_EXECUTE(host_ptr != nullptr, {
std::fill(program_code.begin(), program_code.end(), 0);
return program_code;
});
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}

View File

@@ -5,21 +5,20 @@
#pragma once
#include <array>
#include <atomic>
#include <memory>
#include <set>
#include <tuple>
#include <unordered_map>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace Core {
class System;

View File

@@ -21,6 +21,8 @@
namespace OpenGL::GLShader {
namespace {
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
class ShaderWriter {
public:
void AddExpression(std::string_view text) {
@@ -69,10 +75,10 @@ public:
shader_source += '\n';
}
std::string GenerateTemporal() {
std::string temporal = "tmp";
temporal += std::to_string(temporal_index++);
return temporal;
std::string GenerateTemporary() {
std::string temporary = "tmp";
temporary += std::to_string(temporary_index++);
return temporary;
}
std::string GetResult() {
@@ -87,11 +93,11 @@ private:
}
std::string shader_source;
u32 temporal_index = 1;
u32 temporary_index = 1;
};
/// Generates code to use for a swizzle operation.
static std::string GetSwizzle(u32 elem) {
std::string GetSwizzle(u32 elem) {
ASSERT(elem <= 3);
std::string swizzle = ".";
swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
}
/// Translate topology
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
case Tegra::Shader::OutputTopology::PointList:
return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
}
/// Returns true if an object has to be treated as precise
static bool IsPrecise(Operation operand) {
bool IsPrecise(Operation operand) {
const auto& meta = operand.GetMeta();
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
return false;
}
static bool IsPrecise(Node node) {
bool IsPrecise(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
return IsPrecise(*operation);
}
@@ -426,9 +432,14 @@ private:
std::string Visit(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
return {};
}
const auto decompiler = operation_decompilers[operation_index];
if (decompiler == nullptr) {
UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
UNREACHABLE_MSG("Undefined operation: {}", operation_index);
return {};
}
return (this->*decompiler)(*operation);
@@ -540,7 +551,7 @@ private:
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporal();
const std::string final_offset = code.GenerateTemporary();
code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " +
std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';');
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
@@ -587,9 +598,9 @@ private:
// There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
const std::string temporal = code.GenerateTemporal();
code.AddLine(precise + "float " + temporal + " = " + value + ';');
return temporal;
const std::string temporary = code.GenerateTemporary();
code.AddLine(precise + "float " + temporary + " = " + value + ';');
return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index) {
@@ -601,9 +612,9 @@ private:
return Visit(operand);
}
const std::string temporal = code.GenerateTemporal();
code.AddLine("float " + temporal + " = " + Visit(operand) + ';');
return temporal;
const std::string temporary = code.GenerateTemporary();
code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
return temporary;
}
std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
@@ -718,8 +729,8 @@ private:
result_type));
}
std::string GenerateTexture(Operation operation, const std::string& func,
const std::vector<std::pair<Type, Node>>& extras) {
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
const std::vector<TextureIR>& extras) {
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -729,11 +740,11 @@ private:
const bool has_array = meta->sampler.IsArray();
const bool has_shadow = meta->sampler.IsShadow();
std::string expr = func;
expr += '(';
expr += GetSampler(meta->sampler);
expr += ", ";
std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
}
expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
@@ -751,38 +762,76 @@ private:
}
expr += ')';
for (const auto& extra_pair : extras) {
const auto [type, operand] = extra_pair;
if (operand == nullptr) {
continue;
}
expr += ", ";
switch (type) {
case Type::Int:
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
// Inline the string as an immediate integer in GLSL (some extra arguments are
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
expr += "ftoi(" + Visit(operand) + ')';
}
break;
case Type::Float:
expr += Visit(operand);
break;
default: {
const auto type_int = static_cast<u32>(type);
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
expr += '0';
break;
}
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
} else if (std::get_if<TextureAoffi>(&variant)) {
expr += GenerateTextureAoffi(meta->aoffi);
} else {
UNREACHABLE();
}
}
return expr + ')';
}
std::string GenerateTextureArgument(TextureArgument argument) {
const auto [type, operand] = argument;
if (operand == nullptr) {
return {};
}
std::string expr = ", ";
switch (type) {
case Type::Int:
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
// Inline the string as an immediate integer in GLSL (some extra arguments are
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
expr += "ftoi(" + Visit(operand) + ')';
}
break;
case Type::Float:
expr += Visit(operand);
break;
default: {
const auto type_int = static_cast<u32>(type);
UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
expr += '0';
break;
}
}
return expr;
}
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
if (aoffi.empty()) {
return {};
}
constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
std::string expr = ", ";
expr += coord_constructors.at(aoffi.size() - 1);
expr += '(';
for (std::size_t index = 0; index < aoffi.size(); ++index) {
const auto operand{aoffi.at(index)};
if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
expr += "ftoi(" + Visit(operand) + ')';
}
if (index + 1 < aoffi.size()) {
expr += ", ";
}
}
expr += ')';
return expr;
}
std::string Assign(Operation operation) {
const Node dest = operation[0];
const Node src = operation[1];
@@ -1159,7 +1208,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
std::string expr = GenerateTexture(
operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1170,7 +1220,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
std::string expr = GenerateTexture(
operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1182,7 +1233,8 @@ private:
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
return GenerateTexture(operation, "Gather",
{TextureArgument{type, meta->component}, TextureAoffi{}}) +
GetSwizzle(meta->element);
}
@@ -1196,11 +1248,12 @@ private:
switch (meta->element) {
case 0:
case 1:
return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element);
return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
GetSwizzle(meta->element) + "))";
case 2:
return "0";
case 3:
return "textureQueryLevels(" + sampler + ')';
return "itof(textureQueryLevels(" + sampler + "))";
}
UNREACHABLE();
return "0";
@@ -1211,8 +1264,8 @@ private:
ASSERT(meta);
if (meta->element < 2) {
return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
" * vec2(256))" + GetSwizzle(meta->element) + "))";
return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
GetSwizzle(meta->element) + "))";
}
return "0";
}
@@ -1565,6 +1618,8 @@ private:
ShaderWriter code;
};
} // Anonymous namespace
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);

View File

@@ -5,7 +5,6 @@
#pragma once
#include <array>
#include <set>
#include <string>
#include <utility>
#include <vector>

View File

@@ -10,8 +10,8 @@
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/lz4_compression.h"
#include "common/scm_rev.h"
#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
@@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length);
dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
if (dump.binary.empty()) {
return {};
}
@@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size);
const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
if (code.empty()) {
return {};
}
@@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC(
reinterpret_cast<const u8*>(code.data()), code.size(), 9)};
const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
reinterpret_cast<const u8*>(code.data()), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
@@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const std::vector<u8> compressed_binary =
Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9);
Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",

View File

@@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include <fmt/format.h>
#include "common/assert.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"

View File

@@ -4,12 +4,9 @@
#pragma once
#include <array>
#include <string>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"

View File

@@ -62,7 +62,6 @@ public:
UpdatePipeline();
state.draw.shader_program = 0;
state.draw.program_pipeline = pipeline.handle;
state.geometry_shaders.enabled = (gs != 0);
}
private:

View File

@@ -10,16 +10,62 @@
namespace OpenGL {
OpenGLState OpenGLState::cur_state;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
OpenGLState OpenGLState::cur_state;
bool OpenGLState::s_rgb_used;
namespace {
template <typename T>
bool UpdateValue(T& current_value, const T new_value) {
const bool changed = current_value != new_value;
current_value = new_value;
return changed;
}
template <typename T1, typename T2>
bool UpdateTie(T1 current_value, const T2 new_value) {
const bool changed = current_value != new_value;
current_value = new_value;
return changed;
}
void Enable(GLenum cap, bool enable) {
if (enable) {
glEnable(cap);
} else {
glDisable(cap);
}
}
void Enable(GLenum cap, GLuint index, bool enable) {
if (enable) {
glEnablei(cap, index);
} else {
glDisablei(cap, index);
}
}
void Enable(GLenum cap, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value))
Enable(cap, new_value);
}
void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
if (UpdateValue(current_value, new_value))
Enable(cap, index, new_value);
}
} // namespace
OpenGLState::OpenGLState() {
// These all match default OpenGL values
geometry_shaders.enabled = false;
framebuffer_srgb.enabled = false;
multisample_control.alpha_to_coverage = false;
multisample_control.alpha_to_one = false;
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
primitive_restart.enabled = false;
primitive_restart.index = 0;
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
stencil.test_enabled = false;
auto reset_stencil = [](auto& config) {
const auto ResetStencil = [](auto& config) {
config.test_func = GL_ALWAYS;
config.test_ref = 0;
config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
config.action_depth_pass = GL_KEEP;
config.action_stencil_fail = GL_KEEP;
};
reset_stencil(stencil.front);
reset_stencil(stencil.back);
stencil.test_enabled = false;
ResetStencil(stencil.front);
ResetStencil(stencil.back);
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
item.scissor.width = 0;
item.scissor.height = 0;
}
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
independant_blend.enabled = false;
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
clip_distance = {};
point.size = 1;
fragment_color_clamp.enabled = false;
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
polygon_offset.fill_enable = false;
polygon_offset.line_enable = false;
polygon_offset.point_enable = false;
@@ -103,132 +159,380 @@ OpenGLState::OpenGLState() {
}
void OpenGLState::ApplyDefaultState() {
glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_PRIMITIVE_RESTART);
glDisable(GL_STENCIL_TEST);
glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_SCISSOR_TEST);
}
void OpenGLState::ApplyFramebufferState() const {
if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
}
}
void OpenGLState::ApplyVertexArrayState() const {
if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
glBindVertexArray(draw.vertex_array);
}
}
void OpenGLState::ApplyShaderProgram() const {
if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
glUseProgram(draw.shader_program);
}
}
void OpenGLState::ApplyProgramPipeline() const {
if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
glBindProgramPipeline(draw.program_pipeline);
}
}
void OpenGLState::ApplyClipDistances() const {
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
clip_distance[i]);
}
}
void OpenGLState::ApplyPointSize() const {
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
}
void OpenGLState::ApplyFragmentColorClamp() const {
if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
}
}
void OpenGLState::ApplyMultisample() const {
Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
multisample_control.alpha_to_coverage);
Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
multisample_control.alpha_to_one);
}
void OpenGLState::ApplyDepthClamp() const {
if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
}
cur_state.depth_clamp = depth_clamp;
UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
"Unimplemented Depth Clamp Separation!");
Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
}
void OpenGLState::ApplySRgb() const {
if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
if (framebuffer_srgb.enabled) {
// Track if sRGB is used
s_rgb_used = true;
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
if (framebuffer_srgb.enabled) {
// Track if sRGB is used
s_rgb_used = true;
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
}
}
void OpenGLState::ApplyCulling() const {
if (cull.enabled != cur_state.cull.enabled) {
if (cull.enabled) {
glEnable(GL_CULL_FACE);
} else {
glDisable(GL_CULL_FACE);
}
}
Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
if (cull.mode != cur_state.cull.mode) {
if (UpdateValue(cur_state.cull.mode, cull.mode)) {
glCullFace(cull.mode);
}
if (cull.front_face != cur_state.cull.front_face) {
if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
glFrontFace(cull.front_face);
}
}
void OpenGLState::ApplyColorMask() const {
if (independant_blend.enabled) {
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
const auto& updated = color_mask[i];
const auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
updated.blue_enabled, updated.alpha_enabled);
}
}
} else {
const auto& updated = color_mask[0];
const auto& current = cur_state.color_mask[0];
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
const auto& updated = color_mask[i];
auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
updated.alpha_enabled);
current = updated;
glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
updated.blue_enabled, updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() const {
if (depth.test_enabled != cur_state.depth.test_enabled) {
if (depth.test_enabled) {
glEnable(GL_DEPTH_TEST);
} else {
glDisable(GL_DEPTH_TEST);
}
}
Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
if (depth.test_func != cur_state.depth.test_func) {
if (cur_state.depth.test_func != depth.test_func) {
cur_state.depth.test_func = depth.test_func;
glDepthFunc(depth.test_func);
}
if (depth.write_mask != cur_state.depth.write_mask) {
if (cur_state.depth.write_mask != depth.write_mask) {
cur_state.depth.write_mask = depth.write_mask;
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
if (primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
if (primitive_restart.index != cur_state.primitive_restart.index) {
if (cur_state.primitive_restart.index != primitive_restart.index) {
cur_state.primitive_restart.index = primitive_restart.index;
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() const {
if (stencil.test_enabled != cur_state.stencil.test_enabled) {
if (stencil.test_enabled) {
glEnable(GL_STENCIL_TEST);
} else {
glDisable(GL_STENCIL_TEST);
}
}
Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
config.test_mask != prev_config.test_mask) {
const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
current.test_mask != config.test_mask) {
current.test_func = config.test_func;
current.test_ref = config.test_ref;
current.test_mask = config.test_mask;
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
if (config.action_depth_fail != prev_config.action_depth_fail ||
config.action_depth_pass != prev_config.action_depth_pass ||
config.action_stencil_fail != prev_config.action_stencil_fail) {
if (current.action_depth_fail != config.action_depth_fail ||
current.action_depth_pass != config.action_depth_pass ||
current.action_stencil_fail != config.action_stencil_fail) {
current.action_depth_fail = config.action_depth_fail;
current.action_depth_pass = config.action_depth_pass;
current.action_stencil_fail = config.action_stencil_fail;
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
if (config.write_mask != prev_config.write_mask) {
if (current.write_mask != config.write_mask) {
current.write_mask = config.write_mask;
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void OpenGLState::ApplyViewport() const {
for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
const auto& updated = viewports[i];
auto& current = cur_state.viewports[i];
if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
current.height != updated.height) {
current.x = updated.x;
current.y = updated.y;
current.width = updated.width;
current.height = updated.height;
glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
static_cast<GLfloat>(updated.width),
static_cast<GLfloat>(updated.height));
}
if (current.depth_range_near != updated.depth_range_near ||
current.depth_range_far != updated.depth_range_far) {
current.depth_range_near = updated.depth_range_near;
current.depth_range_far = updated.depth_range_far;
glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
current.scissor.width != updated.scissor.width ||
current.scissor.height != updated.scissor.height) {
current.scissor.x = updated.scissor.x;
current.scissor.y = updated.scissor.y;
current.scissor.width = updated.scissor.width;
current.scissor.height = updated.scissor.height;
glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
const Blend& updated = blend[0];
Blend& current = cur_state.blend[0];
Enable(GL_BLEND, current.enabled, updated.enabled);
if (current.src_rgb_func != updated.src_rgb_func ||
current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
current.dst_a_func != updated.dst_a_func) {
current.src_rgb_func = updated.src_rgb_func;
current.dst_rgb_func = updated.dst_rgb_func;
current.src_a_func = updated.src_a_func;
current.dst_a_func = updated.dst_a_func;
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
current.rgb_equation = updated.rgb_equation;
current.a_equation = updated.a_equation;
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
Blend& current = cur_state.blend[target];
if (current.enabled != updated.enabled || force) {
current.enabled = updated.enabled;
Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
}
if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
current.dst_a_func),
std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func))) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
std::tie(updated.rgb_equation, updated.a_equation))) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
}
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
ApplyTargetBlending(target, force);
}
} else {
ApplyGlobalBlending();
}
cur_state.independant_blend.enabled = independant_blend.enabled;
if (UpdateTie(
std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
cur_state.blend_color.blue, cur_state.blend_color.alpha),
std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
polygon_offset.fill_enable);
Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
polygon_offset.line_enable);
Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
polygon_offset.point_enable);
if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
cur_state.polygon_offset.clamp),
std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
glPolygonOffset(polygon_offset.factor, polygon_offset.units);
}
}
}
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Maxwell::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
if (cur_state_texture_unit.texture == textures[i])
continue;
cur_state_texture_unit.texture = textures[i];
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
}
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
}
}
void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
for (std::size_t i = 0; i < std::size(samplers); ++i) {
if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
continue;
cur_state.texture_units[i].sampler = texture_units[i].sampler;
samplers[i] = texture_units[i].sampler;
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
samplers.data() + first);
}
}
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
ApplyShaderProgram();
ApplyProgramPipeline();
ApplyClipDistances();
ApplyPointSize();
ApplyFragmentColorClamp();
ApplyMultisample();
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
ApplyStencilTest();
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
ApplyBlending();
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
}
void OpenGLState::EmulateViewportWithScissor() {
auto& current = viewports[0];
if (current.scissor.enabled) {
@@ -251,332 +555,6 @@ void OpenGLState::EmulateViewportWithScissor() {
}
}
void OpenGLState::ApplyViewport() const {
if (geometry_shaders.enabled) {
for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
i++) {
const auto& current = cur_state.viewports[i];
const auto& updated = viewports[i];
if (updated.x != current.x || updated.y != current.y ||
updated.width != current.width || updated.height != current.height) {
glViewportIndexedf(
i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
if (updated.scissor.enabled != current.scissor.enabled) {
if (updated.scissor.enabled) {
glEnablei(GL_SCISSOR_TEST, i);
} else {
glDisablei(GL_SCISSOR_TEST, i);
}
}
if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height) {
glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
}
} else {
const auto& current = cur_state.viewports[0];
const auto& updated = viewports[0];
if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
updated.height != current.height) {
glViewport(updated.x, updated.y, updated.width, updated.height);
}
if (updated.depth_range_near != current.depth_range_near ||
updated.depth_range_far != current.depth_range_far) {
glDepthRange(updated.depth_range_near, updated.depth_range_far);
}
if (updated.scissor.enabled != current.scissor.enabled) {
if (updated.scissor.enabled) {
glEnable(GL_SCISSOR_TEST);
} else {
glDisable(GL_SCISSOR_TEST);
}
}
if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
updated.scissor.width != current.scissor.width ||
updated.scissor.height != current.scissor.height) {
glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
if (updated.enabled != current.enabled) {
if (updated.enabled) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
}
if (!updated.enabled) {
return;
}
if (updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
const Blend& current = cur_state.blend[target];
if (updated.enabled != current.enabled || force) {
if (updated.enabled) {
glEnablei(GL_BLEND, static_cast<GLuint>(target));
} else {
glDisablei(GL_BLEND, static_cast<GLuint>(target));
}
}
if (updated.src_rgb_func != current.src_rgb_func ||
updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
updated.dst_a_func != current.dst_a_func) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
}
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
ApplyTargetBlending(i,
independant_blend.enabled != cur_state.independant_blend.enabled);
}
} else {
ApplyGlobalBlending();
}
if (blend_color.red != cur_state.blend_color.red ||
blend_color.green != cur_state.blend_color.green ||
blend_color.blue != cur_state.blend_color.blue ||
blend_color.alpha != cur_state.blend_color.alpha) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
if (logic_op.enabled != cur_state.logic_op.enabled) {
if (logic_op.enabled) {
glEnable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_COLOR_LOGIC_OP);
}
}
if (logic_op.operation != cur_state.logic_op.operation) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
const bool fill_enable_changed =
polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
const bool line_enable_changed =
polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
const bool point_enable_changed =
polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
if (fill_enable_changed) {
if (polygon_offset.fill_enable) {
glEnable(GL_POLYGON_OFFSET_FILL);
} else {
glDisable(GL_POLYGON_OFFSET_FILL);
}
}
if (line_enable_changed) {
if (polygon_offset.line_enable) {
glEnable(GL_POLYGON_OFFSET_LINE);
} else {
glDisable(GL_POLYGON_OFFSET_LINE);
}
}
if (point_enable_changed) {
if (polygon_offset.point_enable) {
glEnable(GL_POLYGON_OFFSET_POINT);
} else {
glDisable(GL_POLYGON_OFFSET_POINT);
}
}
if (factor_changed || units_changed || clamp_changed) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
glPolygonOffset(polygon_offset.factor, polygon_offset.units);
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
}
}
}
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
if (textures[i] != cur_state_texture_unit.texture) {
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
}
}
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
}
}
void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
for (std::size_t i = 0; i < std::size(samplers); ++i) {
samplers[i] = texture_units[i].sampler;
if (samplers[i] != cur_state.texture_units[i].sampler) {
if (!has_delta) {
first = i;
has_delta = true;
}
last = i;
}
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
samplers.data() + first);
}
}
void OpenGLState::ApplyFramebufferState() const {
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
}
if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
}
}
void OpenGLState::ApplyVertexArrayState() const {
if (draw.vertex_array != cur_state.draw.vertex_array) {
glBindVertexArray(draw.vertex_array);
}
}
void OpenGLState::ApplyDepthClamp() const {
if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
return;
}
UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
"Unimplemented Depth Clamp Separation!");
if (depth_clamp.far_plane || depth_clamp.near_plane) {
glEnable(GL_DEPTH_CLAMP);
} else {
glDisable(GL_DEPTH_CLAMP);
}
}
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
// Shader program
if (draw.shader_program != cur_state.draw.shader_program) {
glUseProgram(draw.shader_program);
}
// Program pipeline
if (draw.program_pipeline != cur_state.draw.program_pipeline) {
glBindProgramPipeline(draw.program_pipeline);
}
// Clip distance
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
if (clip_distance[i]) {
glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
} else {
glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
}
}
}
// Point
if (point.size != cur_state.point.size) {
glPointSize(point.size);
}
if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
}
if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
if (multisample_control.alpha_to_coverage) {
glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
} else {
glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
}
}
if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
if (multisample_control.alpha_to_one) {
glEnable(GL_SAMPLE_ALPHA_TO_ONE);
} else {
glDisable(GL_SAMPLE_ALPHA_TO_ONE);
}
}
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
ApplyStencilTest();
ApplySRgb();
ApplyCulling();
ApplyDepth();
ApplyPrimitiveRestart();
ApplyBlending();
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
cur_state = *this;
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
for (auto& unit : texture_units) {
if (unit.texture == handle) {

View File

@@ -53,10 +53,6 @@ public:
bool near_plane;
} depth_clamp; // GL_DEPTH_CLAMP
struct {
bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
} geometry_shaders;
struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
@@ -184,34 +180,26 @@ public:
static OpenGLState GetCurState() {
return cur_state;
}
static bool GetsRGBUsed() {
return s_rgb_used;
}
static void ClearsRGBUsed() {
s_rgb_used = false;
}
/// Apply this state as the current OpenGL state
void Apply() const;
/// Apply only the state affecting the framebuffer
void ApplyFramebufferState() const;
/// Apply only the state affecting the vertex array
void ApplyVertexArrayState() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
void EmulateViewportWithScissor();
private:
static OpenGLState cur_state;
// Workaround for sRGB problems caused by
// QT not supporting srgb output
static bool s_rgb_used;
void ApplyFramebufferState() const;
void ApplyVertexArrayState() const;
void ApplyShaderProgram() const;
void ApplyProgramPipeline() const;
void ApplyClipDistances() const;
void ApplyPointSize() const;
void ApplyFragmentColorClamp() const;
void ApplyMultisample() const;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
OpenGLState& ResetProgram(GLuint handle);
OpenGLState& ResetPipeline(GLuint handle);
OpenGLState& ResetVertexArray(GLuint handle);
OpenGLState& ResetFramebuffer(GLuint handle);
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor();
private:
static OpenGLState cur_state;
// Workaround for sRGB problems caused by QT not supporting srgb output
static bool s_rgb_used;
};
} // namespace OpenGL

View File

@@ -5,7 +5,6 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"

View File

@@ -10,6 +10,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "core/memory.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"

View File

@@ -7,7 +7,9 @@
#include <fmt/format.h>
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
bb, instr,
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
WriteTexInstructionFloat(bb, instr,
GetTld4Code(instr, texture_type, depth_compare, is_array));
const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
WriteTexInstructionFloat(
bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset) {
Node array, Node depth_compare, u32 bias_offset,
std::vector<Node> aoffi) {
const bool is_array = array;
const bool is_shadow = depth_compare;
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
const bool lod_bias_enabled =
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
TextureProcessMode process_mode, bool depth_compare, bool is_array,
bool is_aoffi) {
const bool lod_bias_enabled{
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
u64 parameter_register = instr.gpr20.Value();
if (lod_bias_enabled) {
++parameter_register;
}
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
const Node array = is_array ? GetRegister(array_register) : nullptr;
std::vector<Node> aoffi;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
}
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
dc = GetRegister(depth_register);
dc = GetRegister(parameter_register++);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
bool is_array) {
bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
for (size_t i = 0; i < coord_count; ++i)
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
}
u64 parameter_register = instr.gpr20.Value();
std::vector<Node> aoffi;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
}
Node dc{};
if (depth_compare) {
dc = GetRegister(parameter_register++);
}
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
return {coord_count, total_coord_count};
}
} // namespace VideoCommon::Shader
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
bool is_tld4) {
const auto [coord_offsets, size, wrap_value,
diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
if (is_tld4) {
return {{0, 8, 16}, 6, 32, 64};
} else {
return {{0, 4, 8}, 4, 8, 16};
}
}();
const u32 mask = (1U << size) - 1;
std::vector<Node> aoffi;
aoffi.reserve(coord_count);
const auto aoffi_immediate{
TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
if (!aoffi_immediate) {
// Variable access, not supported on AMD.
LOG_WARNING(HW_GPU,
"AOFFI constant folding failed, some hardware might have graphical issues");
for (std::size_t coord = 0; coord < coord_count; ++coord) {
const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
const Node condition =
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
}
return aoffi;
}
for (std::size_t coord = 0; coord < coord_count; ++coord) {
s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
if (value >= wrap_value) {
value -= diff_value;
}
aoffi.push_back(Immediate(value));
}
return aoffi;
}
} // namespace VideoCommon::Shader

View File

@@ -7,6 +7,7 @@
#include <array>
#include <cstring>
#include <map>
#include <optional>
#include <set>
#include <string>
#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
const Sampler& sampler;
Node array{};
Node depth_compare{};
std::vector<Node> aoffi;
Node bias{};
Node lod{};
Node component{};
@@ -741,14 +743,14 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
bool is_array, bool is_aoffi);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool depth_compare, bool is_array);
bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -757,9 +759,11 @@ private:
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset);
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
@@ -773,6 +777,8 @@ private:
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
template <typename... T>

View File

@@ -6,6 +6,7 @@
#include <utility>
#include <variant>
#include "common/common_types.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
const Node node = code[cursor];
const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
if (operation->GetCode() == operation_code)
return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
// that it uses as operand
const auto [found, found_cursor] =
TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
if (!found) {
return {};
}
if (const auto immediate = std::get_if<ImmediateNode>(found)) {
return immediate->GetValue();
}
return {};
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) {
for (; cursor >= 0; --cursor) {

View File

@@ -373,6 +373,7 @@ void Config::ReadValues() {
ReadSetting("use_accurate_gpu_emulation", false).toBool();
Settings::values.use_asynchronous_gpu_emulation =
ReadSetting("use_asynchronous_gpu_emulation", false).toBool();
Settings::values.force_30fps_mode = ReadSetting("force_30fps_mode", false).toBool();
Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat();
Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat();
@@ -648,6 +649,7 @@ void Config::SaveValues() {
WriteSetting("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation, false);
WriteSetting("use_asynchronous_gpu_emulation", Settings::values.use_asynchronous_gpu_emulation,
false);
WriteSetting("force_30fps_mode", Settings::values.force_30fps_mode, false);
// Cast to double because Qt's written float values are not human-readable
WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0);

View File

@@ -77,6 +77,8 @@ void ConfigureGraphics::setConfiguration() {
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
Settings::values.bg_blue));
}
@@ -90,6 +92,7 @@ void ConfigureGraphics::applyConfiguration() {
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
Settings::values.use_asynchronous_gpu_emulation =
ui->use_asynchronous_gpu_emulation->isChecked();
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
Settings::values.bg_red = static_cast<float>(bg_color.redF());
Settings::values.bg_green = static_cast<float>(bg_color.greenF());
Settings::values.bg_blue = static_cast<float>(bg_color.blueF());

View File

@@ -70,6 +70,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="force_30fps_mode">
<property name="text">
<string>Force 30 FPS mode</string>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>

Some files were not shown because too many files have changed in this diff Show More