Compare commits

..

82 Commits

Author SHA1 Message Date
Andrea Pappacoda
9f08b3c3c9 fix(dist): wrap screenshots in <image> tags
The [appstream] spec says that <screenshot/> tags must be wrapped in
either <image/> or <video/> tags, so this patch does just that.

[appstream]: https://freedesktop.org/software/appstream/docs/chap-Metadata.html#tag-screenshots
2023-01-19 10:13:30 +01:00
Narr the Reg
67c3c65f7b Merge pull request #9638 from Kelebek1/firmware4
Demote maxwell3d Firmware4 call log to debug
2023-01-18 23:15:22 -06:00
bunnei
413df0811d Merge pull request #9619 from liamwhite/timing-spaghetti
timing: wait for completion on unregister
2023-01-18 15:13:38 -08:00
bunnei
82e2ac6026 Merge pull request #9615 from merryhime/upsample-ob1
audio_core: Corrective fixes to upsampler
2023-01-17 23:34:12 -08:00
bunnei
1551f97950 Merge pull request #9608 from liamwhite/fps
nvnflinger: correct swap interval handling
2023-01-17 23:13:47 -08:00
Kelebek1
5a106cf11e Demote maxwell3d Firmware4 call log to debug 2023-01-18 01:59:11 +00:00
bunnei
28fde29924 Merge pull request #9612 from goldenx86/1.5xScaler
1.5X, 7X, 8X resolution scaler options
2023-01-16 17:49:06 -08:00
bunnei
58a10ed5d0 Merge pull request #9622 from merryhime/dynarmic
externals: Update dynarmic to 6.4.4
2023-01-15 23:22:54 -08:00
Merry
2817af0961 externals: Update dynarmic to 6.4.4 2023-01-15 02:31:00 +00:00
Liam
0953cdd271 timing: wait for completion on unregister 2023-01-14 15:48:01 -05:00
Merry
a0e8e5b22e upsample: Fix coefficient format 2023-01-14 17:09:03 +00:00
Merry
122a8faa38 audio_core: Fix off-by-one error in upsampler 2023-01-14 15:19:11 +00:00
Matías Locatti
d7dbd8e774 Update settings.h 2023-01-13 04:56:25 -03:00
Matías Locatti
4294429a07 CPP 2023-01-13 04:55:26 -03:00
Matías Locatti
bb2aec00a2 UI change 2023-01-13 04:54:29 -03:00
Matías Locatti
3ba53f2511 1.5X resolution scaler option 2023-01-13 02:34:24 -03:00
Liam
2f2ef5b147 nvnflinger: correct swap interval handling 2023-01-11 22:05:08 -05:00
bunnei
0e8f98a441 Merge pull request #9605 from german77/mouse_mapping
yuzu: Read mouse scroll
2023-01-10 17:32:58 -08:00
Narr the Reg
87b02f78e1 yuzu: Read mouse wheel input 2023-01-10 18:20:58 -06:00
Morph
c277dad25f Merge pull request #9596 from liamwhite/mvk
MoltenVK: restrict number of vertex attributes/bindings to 16
2023-01-10 18:11:44 -05:00
liamwhite
cbcf210c19 Merge pull request #9582 from yuzu-emu/revert-9518-revert-9504-pg2
Revert "Revert "k_page_group: synchronize""
2023-01-10 15:27:33 -05:00
liamwhite
0eae0b6803 Merge pull request #9601 from liamwhite/it-never-ends
qt: unlock during signal emission
2023-01-10 15:27:12 -05:00
liamwhite
74404261d2 Merge pull request #9598 from liamwhite/indirect
vulkan_common: fix indirect draw with count
2023-01-10 15:27:02 -05:00
liamwhite
e2c68edd35 Merge pull request #9595 from liamwhite/per-game
qt: fix configuration weirdness on turbo
2023-01-10 15:26:49 -05:00
liamwhite
9f974ea818 Merge pull request #9565 from MonsterDruide1/tas-multiplayer-lengths
TAS: Show all script lengths for multiplayer
2023-01-10 15:26:36 -05:00
Liam
385ddef8c3 qt: unlock during signal emission 2023-01-10 12:46:01 -05:00
Liam
fa8581e900 vulkan_common: fix indirect draw with count 2023-01-10 09:43:36 -05:00
TellowKrinkle
eaf425bd32 MoltenVK: restrict number of vertex attributes/bindings to 16 2023-01-09 19:01:09 -05:00
Liam
1caa84e652 qt: fix configuration weirdness on turbo 2023-01-09 17:54:49 -05:00
UltraHDR
fda0e7e989 macOS: Make Yuzu show up in the Launchpad Games folder (#9594)
https://developer.apple.com/documentation/bundleresources/information_property_list/lsapplicationcategorytype
This makes it show up in the Launchpad Games folder
2023-01-09 16:43:45 -05:00
Morph
6bf1436f5c Merge pull request #9589 from liamwhite/default
renderer_vulkan: disable turbo by default
2023-01-09 16:23:24 -05:00
Morph
ffb12f4f9f Merge pull request #9581 from liamwhite/turbo2
renderer_vulkan: pause turbo submissions on inactive queue
2023-01-09 16:23:16 -05:00
Morph
a4cd747122 Merge pull request #9530 from liamwhite/vk-feature-init
vulkan_device: refactor feature testing
2023-01-09 16:23:04 -05:00
Morph
3be8312120 Merge pull request #9569 from liamwhite/shutdown-wars
qt: additional fixes for reentrant shutdown
2023-01-09 16:21:27 -05:00
Liam
279005448a vulkan_device: refactor feature testing 2023-01-09 16:12:16 -05:00
Fernando S
948ac976d1 Merge pull request #9583 from FernandoS27/fuck-ogl-already
VideoCore: Fix OGL cache invalidation.
2023-01-08 14:05:29 -05:00
Liam
0df43e4e07 renderer_vulkan: disable turbo by default 2023-01-08 11:38:30 -05:00
Fernando Sahmkow
efbb6fe288 VideoCore: Fix OGL cache invalidation. 2023-01-07 21:56:17 -05:00
bunnei
a5693afa03 Revert "Revert "k_page_group: synchronize"" 2023-01-07 15:32:10 -08:00
bunnei
66e4a48b75 Merge pull request #9563 from german77/crash_not_allowed
input_common: Create an update engine
2023-01-07 13:41:27 -08:00
MonsterDruide1
115bf20448 TAS: Show all script lengths for multiplayer 2023-01-07 21:15:37 +01:00
liamwhite
926438b68d Merge pull request #9578 from bylaws/patch-2
Avoid OOB array access reading passthrough attr mask
2023-01-07 15:14:08 -05:00
Liam
c19c8ac92c renderer_vulkan: pause turbo submissions on inactive queue 2023-01-07 14:35:11 -05:00
Billy Laws
d34275a260 Avoid OOB array access reading passthrough attr mask
YFC 1.5 extended the size of the varying mask used to hold passthrough attrs without considering this
2023-01-07 18:38:21 +00:00
Narr the Reg
432d48d9c8 Merge pull request #9570 from liamwhite/less-clock-boost
renderer_vulkan: disable clock boost on unvalidated devices
2023-01-07 10:41:37 -06:00
Narr the Reg
cc92b7fd94 Merge pull request #9573 from liamwhite/optional
vulkan_device: avoid attempt to access empty optional
2023-01-07 10:40:21 -06:00
Liam
444b25bae1 vulkan_device: avoid attempt to access empty optional 2023-01-06 21:23:21 -05:00
Liam
2e4dde12c7 renderer_vulkan: disable clock boost on unvalidated devices 2023-01-06 19:07:47 -05:00
Liam
9ed4c13758 qt: additional fixes for reentrant shutdown 2023-01-06 18:50:01 -05:00
liamwhite
c0f17e1b27 Merge pull request #9567 from german77/antialias
opengl: Sanitize antialiasing config
2023-01-06 15:20:23 -05:00
Narr the Reg
4bda2b475f opengl: Sanitize antialiasing config 2023-01-06 13:42:20 -06:00
Fernando S
7ef897a277 Merge pull request #9566 from Wollnashorn/vulkan-cache-header-fix
video_core/vulkan: Fixed loading of Vulkan driver pipeline cache
2023-01-06 11:58:36 -05:00
Wollnashorn
457826a83b video_core/vulkan: Fixed loading of Vulkan driver pipeline cache
The header size of the Vulkan driver pipeline cache files was incorrectly in PipelineCache::LoadVulkanPipelineCache, for which the pipeline cache wasn't read correctly and got invalidated on each load.
2023-01-06 16:52:41 +01:00
Fernando S
8b251fc3f6 Merge pull request #9535 from bylaws/master
Port over several shader-compiler fixes from skyline
2023-01-06 10:06:45 -05:00
liamwhite
3c05988df2 Merge pull request #9561 from liamwhite/update-dynarmic
externals: update dynarmic, xbyak
2023-01-06 10:00:18 -05:00
liamwhite
6d74490139 Merge pull request #9558 from MonsterDruide1/network-timeout-noerror
net: Silently translate ETIMEDOUT network error
2023-01-06 10:00:09 -05:00
liamwhite
020dbcdbc7 Merge pull request #9552 from liamwhite/turbo
vulkan: implement 'turbo mode' clock booster
2023-01-06 09:59:59 -05:00
Fernando S
5bcbb8de45 Merge pull request #9559 from FernandoS27/cached-writes
VideoCore: Implement Cached Writes, use fastmem for reading GPU memory and eliminate old stuffs
2023-01-06 07:31:39 -05:00
liamwhite
990fe2b3fc Merge pull request #9564 from FernandoS27/oops-i-did-it-again
MacroHLE: eliminate 2 rushed macros.
2023-01-05 22:14:27 -05:00
Fernando Sahmkow
f6245dc40a MacroHLE: eliminate 2 rushed macros. 2023-01-05 20:53:31 -05:00
Narr the Reg
8042ce7e19 input_common: Create an update engine 2023-01-05 19:24:29 -06:00
liamwhite
eaca61e073 Merge pull request #9528 from liamwhite/mvk-nulldesc
renderer_vulkan: implement fallback path for null buffer descriptors
2023-01-05 18:31:55 -05:00
liamwhite
3e33a878dc Merge pull request #9536 from liamwhite/debug-utils
vulkan_common: unify VK_EXT_debug_utils and selection of validation layer
2023-01-05 18:31:45 -05:00
Billy Laws
58fec43768 Run clang-format 2023-01-05 22:18:10 +00:00
Billy Laws
12b4c9c04c externals: Update sirit 2023-01-05 22:13:07 +00:00
Billy Laws
68ed60cee4 shader_recompiler: Fix shuffle partitioning for >64 invoc-per-subgroup GPUs
The existing implementation only supports 64 invoc-per-subgroup GPUs, and misbehaves on adreno when invocations need to be split into 4 emulated subgroups.
2023-01-05 22:13:07 +00:00
Billy Laws
6c812a0c84 Vulkan, OpenGL: Hook up geometry shader passthrough emulation 2023-01-05 22:13:07 +00:00
Billy Laws
625a4af73a shader_recompiler: Add support for lowering geometry passthrough
Reuses most of the existing code for generating the gl_Layer passthrough. Fixes geometry in Nier: Automata on GPUs without HW passthrough support.
2023-01-05 22:13:07 +00:00
Billy Laws
9e2997c4b6 Vulkan, OpenGL: Hook up storage buffer alignment code 2023-01-05 22:13:07 +00:00
Billy Laws
8804a4eb23 shader_recompiler: Align SSBO offsets to meet host requirements
We can take advantage of SSBO addresses being passed in a constant bufer to account for the extra alignment requirements in the shader itself.
2023-01-05 22:13:07 +00:00
Billy Laws
3f0985c7b0 shader_recompiler: SPIRV: Only enable int64 feature when supported 2023-01-05 22:13:07 +00:00
Billy Laws
c1cc99584c shader_recompiler: Add comparison operators to descriptor types 2023-01-05 22:13:07 +00:00
Billy Laws
bbfad79c89 Vulkan: Add a workaround for input_position on Adreno drivers
Adreno drivers will crash compiling geometry shaders if the input position is not wrapped in a gl_in struct.
2023-01-05 22:13:07 +00:00
Fernando Sahmkow
b56ad93bbc BufferBase: Don't ignore GPU pages. 2023-01-05 14:00:10 -05:00
Fernando Sahmkow
2d0c4f2b1d Fermi2D: sync cache flushes 2023-01-05 06:43:28 -05:00
Fernando Sahmkow
af5ecb0b15 MemoryManager: use fastmem directly. 2023-01-05 06:06:33 -05:00
MonsterDruide1
688a9fbfa6 net: Silently translate ETIMEDOUT network error 2023-01-05 11:54:36 +01:00
Fernando Sahmkow
6c7eb81f7d video_core: Cache GPU internal writes. 2023-01-05 05:23:39 -05:00
Liam
a4269c285a common: add setting for renderer clock workaround 2023-01-04 22:22:01 -05:00
Liam
301e9bbc03 vulkan: implement 'turbo mode' clock booster 2023-01-04 22:22:01 -05:00
Liam
66ae79de13 renderer_vulkan: implement fallback path for null descriptors 2023-01-04 22:14:01 -05:00
Liam
aa13ee5c4a vulkan_common: unify VK_EXT_debug_utils and selection of validation layer 2023-01-01 11:59:47 -05:00
91 changed files with 2094 additions and 1712 deletions

View File

@@ -53,10 +53,10 @@ SPDX-License-Identifier: CC0-1.0
<developer_name>yuzu Emulator Team</developer_name>
<content_rating type="oars-1.0"/>
<screenshots>
<screenshot type="default">https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/001-Super%20Mario%20Odyssey%20.png</screenshot>
<screenshot>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/004-The%20Legend%20of%20Zelda%20Skyward%20Sword%20HD.png</screenshot>
<screenshot>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/007-Pokemon%20Sword.png</screenshot>
<screenshot>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/010-Hyrule%20Warriors%20Age%20of%20Calamity.png</screenshot>
<screenshot>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/039-Pok%C3%A9mon%20Mystery%20Dungeon%20Rescue%20Team%20DX.png.png.png</screenshot>
<screenshot type="default"><image>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/001-Super%20Mario%20Odyssey%20.png</image></screenshot>
<screenshot><image>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/004-The%20Legend%20of%20Zelda%20Skyward%20Sword%20HD.png</image></screenshot>
<screenshot><image>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/007-Pokemon%20Sword.png</image></screenshot>
<screenshot><image>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/010-Hyrule%20Warriors%20Age%20of%20Calamity.png</image></screenshot>
<screenshot><image>https://raw.githubusercontent.com/yuzu-emu/yuzu-emu.github.io/master/images/screenshots/039-Pok%C3%A9mon%20Mystery%20Dungeon%20Rescue%20Team%20DX.png.png.png</image></screenshot>
</screenshots>
</component>

View File

@@ -20,25 +20,25 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
const u32 target_sample_count, const u32 source_sample_count,
UpsamplerState* state) {
constexpr u32 WindowSize = 10;
constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow1{
51.93359375f, -18.80078125f, 9.73046875f, -5.33203125f, 2.84375f,
-1.41015625f, 0.62109375f, -0.2265625f, 0.0625f, -0.00390625f,
constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc1{
0.95376587f, -0.12872314f, 0.060028076f, -0.032470703f, 0.017669678f,
-0.009124756f, 0.004272461f, -0.001739502f, 0.000579834f, -0.000091552734f,
};
constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow2{
105.35546875f, -24.52734375f, 11.9609375f, -6.515625f, 3.52734375f,
-1.796875f, 0.828125f, -0.32421875f, 0.1015625f, -0.015625f,
constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc2{
0.8230896f, -0.19161987f, 0.093444824f, -0.05090332f, 0.027557373f,
-0.014038086f, 0.0064697266f, -0.002532959f, 0.00079345703f, -0.00012207031f,
};
constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow3{
122.08203125f, -16.47656250f, 7.68359375f, -4.15625000f, 2.26171875f,
-1.16796875f, 0.54687500f, -0.22265625f, 0.07421875f, -0.01171875f,
constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc3{
0.6298828f, -0.19274902f, 0.09725952f, -0.05319214f, 0.028625488f,
-0.014373779f, 0.006500244f, -0.0024719238f, 0.0007324219f, -0.000091552734f,
};
constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow4{
23.73437500f, -9.62109375f, 5.07812500f, -2.78125000f, 1.46875000f,
-0.71484375f, 0.30859375f, -0.10546875f, 0.02734375f, 0.00000000f,
constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc4{
0.4057312f, -0.1468811f, 0.07601929f, -0.041656494f, 0.022216797f,
-0.011016846f, 0.004852295f, -0.0017700195f, 0.00048828125f, -0.000030517578f,
};
constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow5{
80.62500000f, -24.67187500f, 12.44921875f, -6.80859375f, 3.66406250f,
-1.83984375f, 0.83203125f, -0.31640625f, 0.09375000f, -0.01171875f,
constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc5{
0.1854248f, -0.075164795f, 0.03967285f, -0.021728516f, 0.011474609f,
-0.005584717f, 0.0024108887f, -0.0008239746f, 0.00021362305f, 0.0f,
};
if (!state->initialized) {
@@ -91,52 +91,31 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
};
auto calculate_sample = [&state](std::span<const Common::FixedPoint<24, 8>> coeffs1,
std::span<const Common::FixedPoint<24, 8>> coeffs2) -> s32 {
auto calculate_sample = [&state](std::span<const Common::FixedPoint<17, 15>> coeffs1,
std::span<const Common::FixedPoint<17, 15>> coeffs2) -> s32 {
auto output_index{state->history_output_index};
auto start_pos{output_index - state->history_start_index + 1U};
auto end_pos{10U};
u64 result{0};
if (start_pos < 10) {
end_pos = start_pos;
}
for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
result += static_cast<u64>(state->history[output_index].to_raw()) *
coeffs1[coeff_index].to_raw();
u64 prev_contrib{0};
u32 coeff_index{0};
for (; coeff_index < end_pos; coeff_index++, output_index--) {
prev_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
coeffs1[coeff_index].to_raw();
}
auto end_index{state->history_end_index};
for (; start_pos < 9; start_pos++, coeff_index++, end_index--) {
prev_contrib += static_cast<u64>(state->history[end_index].to_raw()) *
coeffs1[coeff_index].to_raw();
output_index = output_index == state->history_start_index ? state->history_end_index
: output_index - 1;
}
output_index =
static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
start_pos = state->history_end_index - output_index + 1U;
end_pos = 10U;
if (start_pos < 10) {
end_pos = start_pos;
for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
result += static_cast<u64>(state->history[output_index].to_raw()) *
coeffs2[coeff_index].to_raw();
output_index = output_index == state->history_end_index ? state->history_start_index
: output_index + 1;
}
u64 next_contrib{0};
coeff_index = 0;
for (; coeff_index < end_pos; coeff_index++, output_index++) {
next_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
coeffs2[coeff_index].to_raw();
}
auto start_index{state->history_start_index};
for (; start_pos < 9; start_pos++, start_index++, coeff_index++) {
next_contrib += static_cast<u64>(state->history[start_index].to_raw()) *
coeffs2[coeff_index].to_raw();
}
return static_cast<s32>(((prev_contrib >> 15) + (next_contrib >> 15)) >> 8);
return static_cast<s32>(result >> (8 + 15));
};
switch (state->ratio.to_int_floor()) {
@@ -150,23 +129,23 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
output[write_index] = calculate_sample(SincWindow3, SincWindow4);
output[write_index] = calculate_sample(WindowedSinc1, WindowedSinc5);
break;
case 2:
output[write_index] = calculate_sample(SincWindow2, SincWindow1);
output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
case 3:
output[write_index] = calculate_sample(SincWindow5, SincWindow5);
output[write_index] = calculate_sample(WindowedSinc3, WindowedSinc3);
break;
case 4:
output[write_index] = calculate_sample(SincWindow1, SincWindow2);
output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
case 5:
output[write_index] = calculate_sample(SincWindow4, SincWindow3);
output[write_index] = calculate_sample(WindowedSinc5, WindowedSinc1);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 6);
@@ -183,11 +162,11 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
output[write_index] = calculate_sample(SincWindow2, SincWindow1);
output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
case 2:
output[write_index] = calculate_sample(SincWindow1, SincWindow2);
output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);
@@ -204,12 +183,12 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
output[write_index] = calculate_sample(SincWindow1, SincWindow2);
output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
case 2:
increment();
output[write_index] = calculate_sample(SincWindow2, SincWindow1);
output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);

View File

@@ -292,9 +292,6 @@ class InputDevice {
public:
virtual ~InputDevice() = default;
// Request input device to update if necessary
virtual void SoftUpdate() {}
// Force input device to update data regardless of the current state
virtual void ForceUpdate() {}

View File

@@ -129,6 +129,10 @@ void UpdateRescalingInfo() {
info.up_scale = 1;
info.down_shift = 0;
break;
case ResolutionSetup::Res3_2X:
info.up_scale = 3;
info.down_shift = 1;
break;
case ResolutionSetup::Res2X:
info.up_scale = 2;
info.down_shift = 0;
@@ -149,6 +153,14 @@ void UpdateRescalingInfo() {
info.up_scale = 6;
info.down_shift = 0;
break;
case ResolutionSetup::Res7X:
info.up_scale = 7;
info.down_shift = 0;
break;
case ResolutionSetup::Res8X:
info.up_scale = 8;
info.down_shift = 0;
break;
default:
ASSERT(false);
info.up_scale = 1;
@@ -185,6 +197,7 @@ void RestoreGlobalState(bool is_powered_on) {
// Renderer
values.fsr_sharpening_slider.SetGlobal(true);
values.renderer_backend.SetGlobal(true);
values.renderer_force_max_clock.SetGlobal(true);
values.vulkan_device.SetGlobal(true);
values.aspect_ratio.SetGlobal(true);
values.max_anisotropy.SetGlobal(true);

View File

@@ -56,11 +56,14 @@ enum class ResolutionSetup : u32 {
Res1_2X = 0,
Res3_4X = 1,
Res1X = 2,
Res2X = 3,
Res3X = 4,
Res4X = 5,
Res5X = 6,
Res6X = 7,
Res3_2X = 3,
Res2X = 4,
Res3X = 5,
Res4X = 6,
Res5X = 7,
Res6X = 8,
Res7X = 9,
Res8X = 10,
};
enum class ScalingFilter : u32 {
@@ -415,6 +418,7 @@ struct Values {
// Renderer
SwitchableSetting<RendererBackend, true> renderer_backend{
RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
Setting<bool> renderer_debug{false, "debug"};
Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};

View File

@@ -226,6 +226,7 @@ add_library(core STATIC
hle/kernel/k_page_buffer.h
hle/kernel/k_page_heap.cpp
hle/kernel/k_page_heap.h
hle/kernel/k_page_group.cpp
hle/kernel/k_page_group.h
hle/kernel/k_page_table.cpp
hle/kernel/k_page_table.h

View File

@@ -142,16 +142,24 @@ void CoreTiming::ScheduleLoopingEvent(std::chrono::nanoseconds start_time,
}
void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data) {
std::scoped_lock scope{basic_lock};
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get() && e.user_data == user_data;
});
std::uintptr_t user_data, bool wait) {
{
std::scoped_lock lk{basic_lock};
const auto itr =
std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get() && e.user_data == user_data;
});
// Removing random items breaks the invariant so we have to re-establish it.
if (itr != event_queue.end()) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
// Removing random items breaks the invariant so we have to re-establish it.
if (itr != event_queue.end()) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
}
}
// Force any in-progress events to finish
if (wait) {
std::scoped_lock lk{advance_lock};
}
}
@@ -190,20 +198,6 @@ u64 CoreTiming::GetClockTicks() const {
return CpuCyclesToClockCycles(ticks);
}
void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
std::scoped_lock lock{basic_lock};
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get();
});
// Removing random items breaks the invariant so we have to re-establish it.
if (itr != event_queue.end()) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
}
}
std::optional<s64> CoreTiming::Advance() {
std::scoped_lock lock{advance_lock, basic_lock};
global_timer = GetGlobalTimeNs().count();

View File

@@ -98,10 +98,13 @@ public:
const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data = 0, bool absolute_time = false);
void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data);
void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data,
bool wait = true);
/// We only permit one event of each type in the queue at a time.
void RemoveEvent(const std::shared_ptr<EventType>& event_type);
void UnscheduleEventWithoutWait(const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data) {
UnscheduleEvent(event_type, user_data, false);
}
void AddTicks(u64 ticks_to_add);

View File

@@ -1434,16 +1434,6 @@ AnalogSticks EmulatedController::GetSticks() const {
return {};
}
// Some drivers like stick from buttons need constant refreshing
for (auto& device : stick_devices) {
if (!device) {
continue;
}
lock.unlock();
device->SoftUpdate();
lock.lock();
}
return controller.analog_stick_state;
}

View File

@@ -27,13 +27,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
auto& page_table = m_owner->PageTable();
// Construct the page group.
m_page_group = {};
m_page_group.emplace(kernel, page_table.GetBlockInfoManager());
// Lock the memory.
R_TRY(page_table.LockForCodeMemory(&m_page_group, addr, size))
R_TRY(page_table.LockForCodeMemory(std::addressof(*m_page_group), addr, size))
// Clear the memory.
for (const auto& block : m_page_group.Nodes()) {
for (const auto& block : *m_page_group) {
std::memset(device_memory.GetPointer<void>(block.GetAddress()), 0xFF, block.GetSize());
}
@@ -51,12 +51,13 @@ Result KCodeMemory::Initialize(Core::DeviceMemory& device_memory, VAddr addr, si
void KCodeMemory::Finalize() {
// Unlock.
if (!m_is_mapped && !m_is_owner_mapped) {
const size_t size = m_page_group.GetNumPages() * PageSize;
m_owner->PageTable().UnlockForCodeMemory(m_address, size, m_page_group);
const size_t size = m_page_group->GetNumPages() * PageSize;
m_owner->PageTable().UnlockForCodeMemory(m_address, size, *m_page_group);
}
// Close the page group.
m_page_group = {};
m_page_group->Close();
m_page_group->Finalize();
// Close our reference to our owner.
m_owner->Close();
@@ -64,7 +65,7 @@ void KCodeMemory::Finalize() {
Result KCodeMemory::Map(VAddr address, size_t size) {
// Validate the size.
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
// Lock ourselves.
KScopedLightLock lk(m_lock);
@@ -74,7 +75,7 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
// Map the memory.
R_TRY(kernel.CurrentProcess()->PageTable().MapPages(
address, m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite));
address, *m_page_group, KMemoryState::CodeOut, KMemoryPermission::UserReadWrite));
// Mark ourselves as mapped.
m_is_mapped = true;
@@ -84,13 +85,13 @@ Result KCodeMemory::Map(VAddr address, size_t size) {
Result KCodeMemory::Unmap(VAddr address, size_t size) {
// Validate the size.
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
// Lock ourselves.
KScopedLightLock lk(m_lock);
// Unmap the memory.
R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, m_page_group,
R_TRY(kernel.CurrentProcess()->PageTable().UnmapPages(address, *m_page_group,
KMemoryState::CodeOut));
// Mark ourselves as unmapped.
@@ -101,7 +102,7 @@ Result KCodeMemory::Unmap(VAddr address, size_t size) {
Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission perm) {
// Validate the size.
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
// Lock ourselves.
KScopedLightLock lk(m_lock);
@@ -125,7 +126,7 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
// Map the memory.
R_TRY(
m_owner->PageTable().MapPages(address, m_page_group, KMemoryState::GeneratedCode, k_perm));
m_owner->PageTable().MapPages(address, *m_page_group, KMemoryState::GeneratedCode, k_perm));
// Mark ourselves as mapped.
m_is_owner_mapped = true;
@@ -135,13 +136,13 @@ Result KCodeMemory::MapToOwner(VAddr address, size_t size, Svc::MemoryPermission
Result KCodeMemory::UnmapFromOwner(VAddr address, size_t size) {
// Validate the size.
R_UNLESS(m_page_group.GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
R_UNLESS(m_page_group->GetNumPages() == Common::DivideUp(size, PageSize), ResultInvalidSize);
// Lock ourselves.
KScopedLightLock lk(m_lock);
// Unmap the memory.
R_TRY(m_owner->PageTable().UnmapPages(address, m_page_group, KMemoryState::GeneratedCode));
R_TRY(m_owner->PageTable().UnmapPages(address, *m_page_group, KMemoryState::GeneratedCode));
// Mark ourselves as unmapped.
m_is_owner_mapped = false;

View File

@@ -3,6 +3,8 @@
#pragma once
#include <optional>
#include "common/common_types.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_auto_object.h"
@@ -49,11 +51,11 @@ public:
return m_address;
}
size_t GetSize() const {
return m_is_initialized ? m_page_group.GetNumPages() * PageSize : 0;
return m_is_initialized ? m_page_group->GetNumPages() * PageSize : 0;
}
private:
KPageGroup m_page_group{};
std::optional<KPageGroup> m_page_group{};
KProcess* m_owner{};
VAddr m_address{};
KLightLock m_lock;

View File

@@ -18,7 +18,8 @@ void KHardwareTimer::Initialize() {
}
void KHardwareTimer::Finalize() {
this->DisableInterrupt();
m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this));
m_wakeup_time = std::numeric_limits<s64>::max();
m_event_type.reset();
}
@@ -59,7 +60,8 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) {
}
void KHardwareTimer::DisableInterrupt() {
m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this));
m_kernel.System().CoreTiming().UnscheduleEventWithoutWait(m_event_type,
reinterpret_cast<uintptr_t>(this));
m_wakeup_time = std::numeric_limits<s64>::max();
}

View File

@@ -223,7 +223,7 @@ Result KMemoryManager::AllocatePageGroupImpl(KPageGroup* out, size_t num_pages,
// Ensure that we don't leave anything un-freed.
ON_RESULT_FAILURE {
for (const auto& it : out->Nodes()) {
for (const auto& it : *out) {
auto& manager = this->GetManager(it.GetAddress());
const size_t node_num_pages = std::min<u64>(
it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
@@ -285,7 +285,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op
m_has_optimized_process[static_cast<size_t>(pool)], true));
// Open the first reference to the pages.
for (const auto& block : out->Nodes()) {
for (const auto& block : *out) {
PAddr cur_address = block.GetAddress();
size_t remaining_pages = block.GetNumPages();
while (remaining_pages > 0) {
@@ -335,7 +335,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
// Perform optimized memory tracking, if we should.
if (optimized) {
// Iterate over the allocated blocks.
for (const auto& block : out->Nodes()) {
for (const auto& block : *out) {
// Get the block extents.
const PAddr block_address = block.GetAddress();
const size_t block_pages = block.GetNumPages();
@@ -391,7 +391,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32
}
} else {
// Set all the allocated memory.
for (const auto& block : out->Nodes()) {
for (const auto& block : *out) {
std::memset(m_system.DeviceMemory().GetPointer<void>(block.GetAddress()), fill_pattern,
block.GetSize());
}

View File

@@ -0,0 +1,121 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/hle/kernel/k_dynamic_resource_manager.h"
#include "core/hle/kernel/k_memory_manager.h"
#include "core/hle/kernel/k_page_group.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/svc_results.h"
namespace Kernel {
void KPageGroup::Finalize() {
KBlockInfo* cur = m_first_block;
while (cur != nullptr) {
KBlockInfo* next = cur->GetNext();
m_manager->Free(cur);
cur = next;
}
m_first_block = nullptr;
m_last_block = nullptr;
}
void KPageGroup::CloseAndReset() {
auto& mm = m_kernel.MemoryManager();
KBlockInfo* cur = m_first_block;
while (cur != nullptr) {
KBlockInfo* next = cur->GetNext();
mm.Close(cur->GetAddress(), cur->GetNumPages());
m_manager->Free(cur);
cur = next;
}
m_first_block = nullptr;
m_last_block = nullptr;
}
size_t KPageGroup::GetNumPages() const {
size_t num_pages = 0;
for (const auto& it : *this) {
num_pages += it.GetNumPages();
}
return num_pages;
}
Result KPageGroup::AddBlock(KPhysicalAddress addr, size_t num_pages) {
// Succeed immediately if we're adding no pages.
R_SUCCEED_IF(num_pages == 0);
// Check for overflow.
ASSERT(addr < addr + num_pages * PageSize);
// Try to just append to the last block.
if (m_last_block != nullptr) {
R_SUCCEED_IF(m_last_block->TryConcatenate(addr, num_pages));
}
// Allocate a new block.
KBlockInfo* new_block = m_manager->Allocate();
R_UNLESS(new_block != nullptr, ResultOutOfResource);
// Initialize the block.
new_block->Initialize(addr, num_pages);
// Add the block to our list.
if (m_last_block != nullptr) {
m_last_block->SetNext(new_block);
} else {
m_first_block = new_block;
}
m_last_block = new_block;
R_SUCCEED();
}
void KPageGroup::Open() const {
auto& mm = m_kernel.MemoryManager();
for (const auto& it : *this) {
mm.Open(it.GetAddress(), it.GetNumPages());
}
}
void KPageGroup::OpenFirst() const {
auto& mm = m_kernel.MemoryManager();
for (const auto& it : *this) {
mm.OpenFirst(it.GetAddress(), it.GetNumPages());
}
}
void KPageGroup::Close() const {
auto& mm = m_kernel.MemoryManager();
for (const auto& it : *this) {
mm.Close(it.GetAddress(), it.GetNumPages());
}
}
bool KPageGroup::IsEquivalentTo(const KPageGroup& rhs) const {
auto lit = this->begin();
auto rit = rhs.begin();
auto lend = this->end();
auto rend = rhs.end();
while (lit != lend && rit != rend) {
if (*lit != *rit) {
return false;
}
++lit;
++rit;
}
return lit == lend && rit == rend;
}
} // namespace Kernel

View File

@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -13,24 +13,23 @@
namespace Kernel {
class KBlockInfoManager;
class KernelCore;
class KPageGroup;
class KBlockInfo {
private:
friend class KPageGroup;
public:
constexpr KBlockInfo() = default;
constexpr explicit KBlockInfo() : m_next(nullptr) {}
constexpr void Initialize(PAddr addr, size_t np) {
constexpr void Initialize(KPhysicalAddress addr, size_t np) {
ASSERT(Common::IsAligned(addr, PageSize));
ASSERT(static_cast<u32>(np) == np);
m_page_index = static_cast<u32>(addr) / PageSize;
m_page_index = static_cast<u32>(addr / PageSize);
m_num_pages = static_cast<u32>(np);
}
constexpr PAddr GetAddress() const {
constexpr KPhysicalAddress GetAddress() const {
return m_page_index * PageSize;
}
constexpr size_t GetNumPages() const {
@@ -39,10 +38,10 @@ public:
constexpr size_t GetSize() const {
return this->GetNumPages() * PageSize;
}
constexpr PAddr GetEndAddress() const {
constexpr KPhysicalAddress GetEndAddress() const {
return (m_page_index + m_num_pages) * PageSize;
}
constexpr PAddr GetLastAddress() const {
constexpr KPhysicalAddress GetLastAddress() const {
return this->GetEndAddress() - 1;
}
@@ -62,8 +61,8 @@ public:
return !(*this == rhs);
}
constexpr bool IsStrictlyBefore(PAddr addr) const {
const PAddr end = this->GetEndAddress();
constexpr bool IsStrictlyBefore(KPhysicalAddress addr) const {
const KPhysicalAddress end = this->GetEndAddress();
if (m_page_index != 0 && end == 0) {
return false;
@@ -72,11 +71,11 @@ public:
return end < addr;
}
constexpr bool operator<(PAddr addr) const {
constexpr bool operator<(KPhysicalAddress addr) const {
return this->IsStrictlyBefore(addr);
}
constexpr bool TryConcatenate(PAddr addr, size_t np) {
constexpr bool TryConcatenate(KPhysicalAddress addr, size_t np) {
if (addr != 0 && addr == this->GetEndAddress()) {
m_num_pages += static_cast<u32>(np);
return true;
@@ -90,96 +89,118 @@ private:
}
private:
friend class KPageGroup;
KBlockInfo* m_next{};
u32 m_page_index{};
u32 m_num_pages{};
};
static_assert(sizeof(KBlockInfo) <= 0x10);
class KPageGroup final {
class KPageGroup {
public:
class Node final {
class Iterator {
public:
constexpr Node(u64 addr_, std::size_t num_pages_) : addr{addr_}, num_pages{num_pages_} {}
using iterator_category = std::forward_iterator_tag;
using value_type = const KBlockInfo;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
constexpr u64 GetAddress() const {
return addr;
constexpr explicit Iterator(pointer n) : m_node(n) {}
constexpr bool operator==(const Iterator& rhs) const {
return m_node == rhs.m_node;
}
constexpr bool operator!=(const Iterator& rhs) const {
return !(*this == rhs);
}
constexpr std::size_t GetNumPages() const {
return num_pages;
constexpr pointer operator->() const {
return m_node;
}
constexpr reference operator*() const {
return *m_node;
}
constexpr std::size_t GetSize() const {
return GetNumPages() * PageSize;
constexpr Iterator& operator++() {
m_node = m_node->GetNext();
return *this;
}
constexpr Iterator operator++(int) {
const Iterator it{*this};
++(*this);
return it;
}
private:
u64 addr{};
std::size_t num_pages{};
pointer m_node{};
};
public:
KPageGroup() = default;
KPageGroup(u64 address, u64 num_pages) {
ASSERT(AddBlock(address, num_pages).IsSuccess());
explicit KPageGroup(KernelCore& kernel, KBlockInfoManager* m)
: m_kernel{kernel}, m_manager{m} {}
~KPageGroup() {
this->Finalize();
}
constexpr std::list<Node>& Nodes() {
return nodes;
void CloseAndReset();
void Finalize();
Iterator begin() const {
return Iterator{m_first_block};
}
Iterator end() const {
return Iterator{nullptr};
}
bool empty() const {
return m_first_block == nullptr;
}
constexpr const std::list<Node>& Nodes() const {
return nodes;
Result AddBlock(KPhysicalAddress addr, size_t num_pages);
void Open() const;
void OpenFirst() const;
void Close() const;
size_t GetNumPages() const;
bool IsEquivalentTo(const KPageGroup& rhs) const;
bool operator==(const KPageGroup& rhs) const {
return this->IsEquivalentTo(rhs);
}
std::size_t GetNumPages() const {
std::size_t num_pages = 0;
for (const Node& node : nodes) {
num_pages += node.GetNumPages();
}
return num_pages;
bool operator!=(const KPageGroup& rhs) const {
return !(*this == rhs);
}
bool IsEqual(KPageGroup& other) const {
auto this_node = nodes.begin();
auto other_node = other.nodes.begin();
while (this_node != nodes.end() && other_node != other.nodes.end()) {
if (this_node->GetAddress() != other_node->GetAddress() ||
this_node->GetNumPages() != other_node->GetNumPages()) {
return false;
}
this_node = std::next(this_node);
other_node = std::next(other_node);
}
return this_node == nodes.end() && other_node == other.nodes.end();
}
Result AddBlock(u64 address, u64 num_pages) {
if (!num_pages) {
return ResultSuccess;
}
if (!nodes.empty()) {
const auto node = nodes.back();
if (node.GetAddress() + node.GetNumPages() * PageSize == address) {
address = node.GetAddress();
num_pages += node.GetNumPages();
nodes.pop_back();
}
}
nodes.push_back({address, num_pages});
return ResultSuccess;
}
bool Empty() const {
return nodes.empty();
}
void Finalize() {}
private:
std::list<Node> nodes;
KernelCore& m_kernel;
KBlockInfo* m_first_block{};
KBlockInfo* m_last_block{};
KBlockInfoManager* m_manager{};
};
class KScopedPageGroup {
public:
explicit KScopedPageGroup(const KPageGroup* gp) : m_pg(gp) {
if (m_pg) {
m_pg->Open();
}
}
explicit KScopedPageGroup(const KPageGroup& gp) : KScopedPageGroup(std::addressof(gp)) {}
~KScopedPageGroup() {
if (m_pg) {
m_pg->Close();
}
}
void CancelClose() {
m_pg = nullptr;
}
private:
const KPageGroup* m_pg{};
};
} // namespace Kernel

View File

@@ -100,7 +100,7 @@ constexpr size_t GetAddressSpaceWidthFromType(FileSys::ProgramAddressSpaceType a
KPageTable::KPageTable(Core::System& system_)
: m_general_lock{system_.Kernel()},
m_map_physical_memory_lock{system_.Kernel()}, m_system{system_} {}
m_map_physical_memory_lock{system_.Kernel()}, m_system{system_}, m_kernel{system_.Kernel()} {}
KPageTable::~KPageTable() = default;
@@ -373,7 +373,7 @@ Result KPageTable::MapProcessCode(VAddr addr, size_t num_pages, KMemoryState sta
m_memory_block_slab_manager);
// Allocate and open.
KPageGroup pg;
KPageGroup pg{m_kernel, m_block_info_manager};
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
&pg, num_pages,
KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, m_allocation_option)));
@@ -432,7 +432,7 @@ Result KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, size_t si
const size_t num_pages = size / PageSize;
// Create page groups for the memory being mapped.
KPageGroup pg;
KPageGroup pg{m_kernel, m_block_info_manager};
AddRegionToPages(src_address, num_pages, pg);
// Reprotect the source as kernel-read/not mapped.
@@ -593,7 +593,7 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
const size_t size = num_pages * PageSize;
// We're making a new group, not adding to an existing one.
R_UNLESS(pg.Empty(), ResultInvalidCurrentMemory);
R_UNLESS(pg.empty(), ResultInvalidCurrentMemory);
// Begin traversal.
Common::PageTable::TraversalContext context;
@@ -640,11 +640,10 @@ Result KPageTable::MakePageGroup(KPageGroup& pg, VAddr addr, size_t num_pages) {
R_SUCCEED();
}
bool KPageTable::IsValidPageGroup(const KPageGroup& pg_ll, VAddr addr, size_t num_pages) {
bool KPageTable::IsValidPageGroup(const KPageGroup& pg, VAddr addr, size_t num_pages) {
ASSERT(this->IsLockedByCurrentThread());
const size_t size = num_pages * PageSize;
const auto& pg = pg_ll.Nodes();
const auto& memory_layout = m_system.Kernel().MemoryLayout();
// Empty groups are necessarily invalid.
@@ -942,9 +941,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
ON_RESULT_FAILURE {
if (cur_mapped_addr != dst_addr) {
// HACK: Manually close the pages.
HACK_ClosePages(dst_addr, (cur_mapped_addr - dst_addr) / PageSize);
ASSERT(Operate(dst_addr, (cur_mapped_addr - dst_addr) / PageSize,
KMemoryPermission::None, OperationType::Unmap)
.IsSuccess());
@@ -1020,9 +1016,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
// Map the page.
R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, start_partial_page));
// HACK: Manually open the pages.
HACK_OpenPages(start_partial_page, 1);
// Update tracking extents.
cur_mapped_addr += PageSize;
cur_block_addr += PageSize;
@@ -1051,9 +1044,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
R_TRY(Operate(cur_mapped_addr, cur_block_size / PageSize, test_perm, OperationType::Map,
cur_block_addr));
// HACK: Manually open the pages.
HACK_OpenPages(cur_block_addr, cur_block_size / PageSize);
// Update tracking extents.
cur_mapped_addr += cur_block_size;
cur_block_addr = next_entry.phys_addr;
@@ -1073,9 +1063,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
R_TRY(Operate(cur_mapped_addr, last_block_size / PageSize, test_perm, OperationType::Map,
cur_block_addr));
// HACK: Manually open the pages.
HACK_OpenPages(cur_block_addr, last_block_size / PageSize);
// Update tracking extents.
cur_mapped_addr += last_block_size;
cur_block_addr += last_block_size;
@@ -1107,9 +1094,6 @@ Result KPageTable::SetupForIpcServer(VAddr* out_addr, size_t size, VAddr src_add
// Map the page.
R_TRY(Operate(cur_mapped_addr, 1, test_perm, OperationType::Map, end_partial_page));
// HACK: Manually open the pages.
HACK_OpenPages(end_partial_page, 1);
}
// Update memory blocks to reflect our changes
@@ -1211,9 +1195,6 @@ Result KPageTable::CleanupForIpcServer(VAddr address, size_t size, KMemoryState
const size_t aligned_size = aligned_end - aligned_start;
const size_t aligned_num_pages = aligned_size / PageSize;
// HACK: Manually close the pages.
HACK_ClosePages(aligned_start, aligned_num_pages);
// Unmap the pages.
R_TRY(Operate(aligned_start, aligned_num_pages, KMemoryPermission::None, OperationType::Unmap));
@@ -1501,17 +1482,6 @@ void KPageTable::CleanupForIpcClientOnServerSetupFailure([[maybe_unused]] PageLi
}
}
void KPageTable::HACK_OpenPages(PAddr phys_addr, size_t num_pages) {
m_system.Kernel().MemoryManager().OpenFirst(phys_addr, num_pages);
}
void KPageTable::HACK_ClosePages(VAddr virt_addr, size_t num_pages) {
for (size_t index = 0; index < num_pages; ++index) {
const auto paddr = GetPhysicalAddr(virt_addr + (index * PageSize));
m_system.Kernel().MemoryManager().Close(paddr, 1);
}
}
Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
// Lock the physical memory lock.
KScopedLightLock phys_lk(m_map_physical_memory_lock);
@@ -1572,7 +1542,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the new memory.
KPageGroup pg;
KPageGroup pg{m_kernel, m_block_info_manager};
R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess(
&pg, (size - mapped_size) / PageSize, m_allocate_option, 0, 0));
@@ -1650,7 +1620,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
KScopedPageTableUpdater updater(this);
// Prepare to iterate over the memory.
auto pg_it = pg.Nodes().begin();
auto pg_it = pg.begin();
PAddr pg_phys_addr = pg_it->GetAddress();
size_t pg_pages = pg_it->GetNumPages();
@@ -1680,9 +1650,6 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
last_unmap_address + 1 - cur_address) /
PageSize;
// HACK: Manually close the pages.
HACK_ClosePages(cur_address, cur_pages);
// Unmap.
ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None,
OperationType::Unmap)
@@ -1703,7 +1670,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
// Release any remaining unmapped memory.
m_system.Kernel().MemoryManager().OpenFirst(pg_phys_addr, pg_pages);
m_system.Kernel().MemoryManager().Close(pg_phys_addr, pg_pages);
for (++pg_it; pg_it != pg.Nodes().end(); ++pg_it) {
for (++pg_it; pg_it != pg.end(); ++pg_it) {
m_system.Kernel().MemoryManager().OpenFirst(pg_it->GetAddress(),
pg_it->GetNumPages());
m_system.Kernel().MemoryManager().Close(pg_it->GetAddress(),
@@ -1731,7 +1698,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
// Check if we're at the end of the physical block.
if (pg_pages == 0) {
// Ensure there are more pages to map.
ASSERT(pg_it != pg.Nodes().end());
ASSERT(pg_it != pg.end());
// Advance our physical block.
++pg_it;
@@ -1742,10 +1709,7 @@ Result KPageTable::MapPhysicalMemory(VAddr address, size_t size) {
// Map whatever we can.
const size_t cur_pages = std::min(pg_pages, map_pages);
R_TRY(Operate(cur_address, cur_pages, KMemoryPermission::UserReadWrite,
OperationType::Map, pg_phys_addr));
// HACK: Manually open the pages.
HACK_OpenPages(pg_phys_addr, cur_pages);
OperationType::MapFirst, pg_phys_addr));
// Advance.
cur_address += cur_pages * PageSize;
@@ -1888,9 +1852,6 @@ Result KPageTable::UnmapPhysicalMemory(VAddr address, size_t size) {
last_address + 1 - cur_address) /
PageSize;
// HACK: Manually close the pages.
HACK_ClosePages(cur_address, cur_pages);
// Unmap.
ASSERT(Operate(cur_address, cur_pages, KMemoryPermission::None, OperationType::Unmap)
.IsSuccess());
@@ -1955,7 +1916,7 @@ Result KPageTable::MapMemory(VAddr dst_address, VAddr src_address, size_t size)
R_TRY(dst_allocator_result);
// Map the memory.
KPageGroup page_linked_list;
KPageGroup page_linked_list{m_kernel, m_block_info_manager};
const size_t num_pages{size / PageSize};
const KMemoryPermission new_src_perm = static_cast<KMemoryPermission>(
KMemoryPermission::KernelRead | KMemoryPermission::NotMapped);
@@ -2022,14 +1983,14 @@ Result KPageTable::UnmapMemory(VAddr dst_address, VAddr src_address, size_t size
num_dst_allocator_blocks);
R_TRY(dst_allocator_result);
KPageGroup src_pages;
KPageGroup dst_pages;
KPageGroup src_pages{m_kernel, m_block_info_manager};
KPageGroup dst_pages{m_kernel, m_block_info_manager};
const size_t num_pages{size / PageSize};
AddRegionToPages(src_address, num_pages, src_pages);
AddRegionToPages(dst_address, num_pages, dst_pages);
R_UNLESS(dst_pages.IsEqual(src_pages), ResultInvalidMemoryRegion);
R_UNLESS(dst_pages.IsEquivalentTo(src_pages), ResultInvalidMemoryRegion);
{
auto block_guard = detail::ScopeExit([&] { MapPages(dst_address, dst_pages, dst_perm); });
@@ -2060,7 +2021,7 @@ Result KPageTable::MapPages(VAddr addr, const KPageGroup& page_linked_list,
VAddr cur_addr{addr};
for (const auto& node : page_linked_list.Nodes()) {
for (const auto& node : page_linked_list) {
if (const auto result{
Operate(cur_addr, node.GetNumPages(), perm, OperationType::Map, node.GetAddress())};
result.IsError()) {
@@ -2160,7 +2121,7 @@ Result KPageTable::UnmapPages(VAddr addr, const KPageGroup& page_linked_list) {
VAddr cur_addr{addr};
for (const auto& node : page_linked_list.Nodes()) {
for (const auto& node : page_linked_list) {
if (const auto result{Operate(cur_addr, node.GetNumPages(), KMemoryPermission::None,
OperationType::Unmap)};
result.IsError()) {
@@ -2527,13 +2488,13 @@ Result KPageTable::SetHeapSize(VAddr* out, size_t size) {
R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
// Allocate pages for the heap extension.
KPageGroup pg;
KPageGroup pg{m_kernel, m_block_info_manager};
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
&pg, allocation_size / PageSize,
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
// Clear all the newly allocated pages.
for (const auto& it : pg.Nodes()) {
for (const auto& it : pg) {
std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()), m_heap_fill_value,
it.GetSize());
}
@@ -2610,11 +2571,23 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(size_t needed_num_pages, size_
if (is_map_only) {
R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
} else {
KPageGroup page_group;
R_TRY(m_system.Kernel().MemoryManager().AllocateForProcess(
&page_group, needed_num_pages,
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option), 0, 0));
R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
// Create a page group tohold the pages we allocate.
KPageGroup pg{m_kernel, m_block_info_manager};
R_TRY(m_system.Kernel().MemoryManager().AllocateAndOpen(
&pg, needed_num_pages,
KMemoryManager::EncodeOption(m_memory_pool, m_allocation_option)));
// Ensure that the page group is closed when we're done working with it.
SCOPE_EXIT({ pg.Close(); });
// Clear all pages.
for (const auto& it : pg) {
std::memset(m_system.DeviceMemory().GetPointer<void>(it.GetAddress()),
m_heap_fill_value, it.GetSize());
}
R_TRY(Operate(addr, needed_num_pages, pg, OperationType::MapGroup));
}
// Update the blocks.
@@ -2795,19 +2768,28 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, const KPageGroup& page_
ASSERT(num_pages > 0);
ASSERT(num_pages == page_group.GetNumPages());
for (const auto& node : page_group.Nodes()) {
const size_t size{node.GetNumPages() * PageSize};
switch (operation) {
case OperationType::MapGroup: {
// We want to maintain a new reference to every page in the group.
KScopedPageGroup spg(page_group);
switch (operation) {
case OperationType::MapGroup:
for (const auto& node : page_group) {
const size_t size{node.GetNumPages() * PageSize};
// Map the pages.
m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, size, node.GetAddress());
break;
default:
ASSERT(false);
break;
addr += size;
}
addr += size;
// We succeeded! We want to persist the reference to the pages.
spg.CancelClose();
break;
}
default:
ASSERT(false);
break;
}
R_SUCCEED();
@@ -2822,13 +2804,29 @@ Result KPageTable::Operate(VAddr addr, size_t num_pages, KMemoryPermission perm,
ASSERT(ContainsPages(addr, num_pages));
switch (operation) {
case OperationType::Unmap:
case OperationType::Unmap: {
// Ensure that any pages we track close on exit.
KPageGroup pages_to_close{m_kernel, this->GetBlockInfoManager()};
SCOPE_EXIT({ pages_to_close.CloseAndReset(); });
this->AddRegionToPages(addr, num_pages, pages_to_close);
m_system.Memory().UnmapRegion(*m_page_table_impl, addr, num_pages * PageSize);
break;
}
case OperationType::MapFirst:
case OperationType::Map: {
ASSERT(map_addr);
ASSERT(Common::IsAligned(map_addr, PageSize));
m_system.Memory().MapMemoryRegion(*m_page_table_impl, addr, num_pages * PageSize, map_addr);
// Open references to pages, if we should.
if (IsHeapPhysicalAddress(m_kernel.MemoryLayout(), map_addr)) {
if (operation == OperationType::MapFirst) {
m_kernel.MemoryManager().OpenFirst(map_addr, num_pages);
} else {
m_kernel.MemoryManager().Open(map_addr, num_pages);
}
}
break;
}
case OperationType::Separate: {

View File

@@ -107,6 +107,10 @@ public:
return *m_page_table_impl;
}
KBlockInfoManager* GetBlockInfoManager() {
return m_block_info_manager;
}
bool CanContain(VAddr addr, size_t size, KMemoryState state) const;
protected:
@@ -261,10 +265,6 @@ private:
void CleanupForIpcClientOnServerSetupFailure(PageLinkedList* page_list, VAddr address,
size_t size, KMemoryPermission prot_perm);
// HACK: These will be removed once we automatically manage page reference counts.
void HACK_OpenPages(PAddr phys_addr, size_t num_pages);
void HACK_ClosePages(VAddr virt_addr, size_t num_pages);
mutable KLightLock m_general_lock;
mutable KLightLock m_map_physical_memory_lock;
@@ -488,6 +488,7 @@ private:
std::unique_ptr<Common::PageTable> m_page_table_impl;
Core::System& m_system;
KernelCore& m_kernel;
};
} // namespace Kernel

View File

@@ -13,10 +13,7 @@
namespace Kernel {
KSharedMemory::KSharedMemory(KernelCore& kernel_) : KAutoObjectWithSlabHeapAndContainer{kernel_} {}
KSharedMemory::~KSharedMemory() {
kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemoryMax, size);
}
KSharedMemory::~KSharedMemory() = default;
Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* owner_process_,
Svc::MemoryPermission owner_permission_,
@@ -49,7 +46,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
R_UNLESS(physical_address != 0, ResultOutOfMemory);
//! Insert the result into our page group.
page_group.emplace(physical_address, num_pages);
page_group.emplace(kernel, &kernel.GetSystemSystemResource().GetBlockInfoManager());
page_group->AddBlock(physical_address, num_pages);
// Commit our reservation.
memory_reservation.Commit();
@@ -62,7 +60,7 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
is_initialized = true;
// Clear all pages in the memory.
for (const auto& block : page_group->Nodes()) {
for (const auto& block : *page_group) {
std::memset(device_memory_.GetPointer<void>(block.GetAddress()), 0, block.GetSize());
}
@@ -71,13 +69,8 @@ Result KSharedMemory::Initialize(Core::DeviceMemory& device_memory_, KProcess* o
void KSharedMemory::Finalize() {
// Close and finalize the page group.
// page_group->Close();
// page_group->Finalize();
//! HACK: Manually close.
for (const auto& block : page_group->Nodes()) {
kernel.MemoryManager().Close(block.GetAddress(), block.GetNumPages());
}
page_group->Close();
page_group->Finalize();
// Release the memory reservation.
resource_limit->Release(LimitableResource::PhysicalMemoryMax, size);

View File

@@ -14,4 +14,7 @@ constexpr std::size_t PageSize{1 << PageBits};
using Page = std::array<u8, PageSize>;
using KPhysicalAddress = PAddr;
using KProcessAddress = VAddr;
} // namespace Kernel

View File

@@ -1485,7 +1485,7 @@ static Result MapProcessMemory(Core::System& system, VAddr dst_address, Handle p
ResultInvalidMemoryRegion);
// Create a new page group.
KPageGroup pg;
KPageGroup pg{system.Kernel(), dst_pt.GetBlockInfoManager()};
R_TRY(src_pt.MakeAndOpenPageGroup(
std::addressof(pg), src_address, size / PageSize, KMemoryState::FlagCanMapProcess,
KMemoryState::FlagCanMapProcess, KMemoryPermission::None, KMemoryPermission::None,

View File

@@ -312,8 +312,6 @@ void NVFlinger::Compose() {
}
s64 NVFlinger::GetNextTicks() const {
static constexpr s64 max_hertz = 120LL;
const auto& settings = Settings::values;
auto speed_scale = 1.f;
if (settings.use_multi_core.GetValue()) {
@@ -327,9 +325,11 @@ s64 NVFlinger::GetNextTicks() const {
}
}
const auto next_ticks = ((1000000000 * (1LL << swap_interval)) / max_hertz);
// As an extension, treat nonpositive swap interval as framerate multiplier.
const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval)
: 60.f / static_cast<f32>(swap_interval);
return static_cast<s64>(speed_scale * static_cast<float>(next_ticks));
return static_cast<s64>(speed_scale * (1000000000.f / effective_fps));
}
} // namespace Service::NVFlinger

View File

@@ -133,7 +133,7 @@ private:
/// layers.
u32 next_buffer_queue_id = 1;
u32 swap_interval = 1;
s32 swap_interval = 1;
/// Event that handles screen composition.
std::shared_ptr<Core::Timing::EventType> multi_composition_event;

View File

@@ -117,6 +117,8 @@ Errno TranslateNativeError(int e) {
return Errno::NETUNREACH;
case WSAEMSGSIZE:
return Errno::MSGSIZE;
case WSAETIMEDOUT:
return Errno::TIMEDOUT;
default:
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
return Errno::OTHER;
@@ -211,6 +213,8 @@ Errno TranslateNativeError(int e) {
return Errno::NETUNREACH;
case EMSGSIZE:
return Errno::MSGSIZE;
case ETIMEDOUT:
return Errno::TIMEDOUT;
default:
UNIMPLEMENTED_MSG("Unimplemented errno={}", e);
return Errno::OTHER;
@@ -226,7 +230,7 @@ Errno GetAndLogLastError() {
int e = errno;
#endif
const Errno err = TranslateNativeError(e);
if (err == Errno::AGAIN) {
if (err == Errno::AGAIN || err == Errno::TIMEDOUT) {
return err;
}
LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));

View File

@@ -436,7 +436,7 @@ struct Memory::Impl {
}
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}

View File

@@ -156,10 +156,12 @@ void Tas::RecordInput(u64 buttons, TasAnalog left_axis, TasAnalog right_axis) {
};
}
std::tuple<TasState, size_t, size_t> Tas::GetStatus() const {
std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> Tas::GetStatus() const {
TasState state;
std::array<size_t, PLAYER_NUMBER> lengths{0};
if (is_recording) {
return {TasState::Recording, 0, record_commands.size()};
lengths[0] = record_commands.size();
return {TasState::Recording, record_commands.size(), lengths};
}
if (is_running) {
@@ -168,7 +170,11 @@ std::tuple<TasState, size_t, size_t> Tas::GetStatus() const {
state = TasState::Stopped;
}
return {state, current_command, script_length};
for (size_t i = 0; i < PLAYER_NUMBER; i++) {
lengths[i] = commands[i].size();
}
return {state, current_command, lengths};
}
void Tas::UpdateThread() {

View File

@@ -124,7 +124,7 @@ public:
* Current playback progress ;
* Total length of script file currently loaded or being recorded
*/
std::tuple<TasState, size_t, size_t> GetStatus() const;
std::tuple<TasState, size_t, std::array<size_t, PLAYER_NUMBER>> GetStatus() const;
private:
enum class TasAxis : u8;

View File

@@ -13,11 +13,11 @@ class Stick final : public Common::Input::InputDevice {
public:
using Button = std::unique_ptr<Common::Input::InputDevice>;
Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_,
Stick(Button up_, Button down_, Button left_, Button right_, Button modifier_, Button updater_,
float modifier_scale_, float modifier_angle_)
: up(std::move(up_)), down(std::move(down_)), left(std::move(left_)),
right(std::move(right_)), modifier(std::move(modifier_)), modifier_scale(modifier_scale_),
modifier_angle(modifier_angle_) {
right(std::move(right_)), modifier(std::move(modifier_)), updater(std::move(updater_)),
modifier_scale(modifier_scale_), modifier_angle(modifier_angle_) {
up->SetCallback({
.on_change =
[this](const Common::Input::CallbackStatus& callback_) {
@@ -48,6 +48,9 @@ public:
UpdateModButtonStatus(callback_);
},
});
updater->SetCallback({
.on_change = [this](const Common::Input::CallbackStatus& callback_) { SoftUpdate(); },
});
last_x_axis_value = 0.0f;
last_y_axis_value = 0.0f;
}
@@ -248,7 +251,7 @@ public:
modifier->ForceUpdate();
}
void SoftUpdate() override {
void SoftUpdate() {
Common::Input::CallbackStatus status{
.type = Common::Input::InputType::Stick,
.stick_status = GetStatus(),
@@ -308,6 +311,7 @@ private:
Button left;
Button right;
Button modifier;
Button updater;
float modifier_scale{};
float modifier_angle{};
float angle{};
@@ -331,11 +335,12 @@ std::unique_ptr<Common::Input::InputDevice> StickFromButton::Create(
auto left = Common::Input::CreateInputDeviceFromString(params.Get("left", null_engine));
auto right = Common::Input::CreateInputDeviceFromString(params.Get("right", null_engine));
auto modifier = Common::Input::CreateInputDeviceFromString(params.Get("modifier", null_engine));
auto updater = Common::Input::CreateInputDeviceFromString("engine:updater,button:0");
auto modifier_scale = params.Get("modifier_scale", 0.5f);
auto modifier_angle = params.Get("modifier_angle", 5.5f);
return std::make_unique<Stick>(std::move(up), std::move(down), std::move(left),
std::move(right), std::move(modifier), modifier_scale,
modifier_angle);
std::move(right), std::move(modifier), std::move(updater),
modifier_scale, modifier_angle);
}
} // namespace InputCommon

View File

@@ -76,7 +76,7 @@ void MappingFactory::RegisterButton(const MappingData& data) {
break;
case EngineInputType::Analog:
// Ignore mouse axis when mapping buttons
if (data.engine == "mouse") {
if (data.engine == "mouse" && data.index != 4) {
return;
}
new_input.Set("axis", data.index);

View File

@@ -28,6 +28,28 @@
namespace InputCommon {
/// Dummy engine to get periodic updates
class UpdateEngine final : public InputEngine {
public:
explicit UpdateEngine(std::string input_engine_) : InputEngine(std::move(input_engine_)) {
PreSetController(identifier);
}
void PumpEvents() {
SetButton(identifier, 0, last_state);
last_state = !last_state;
}
private:
static constexpr PadIdentifier identifier = {
.guid = Common::UUID{},
.port = 0,
.pad = 0,
};
bool last_state{};
};
struct InputSubsystem::Impl {
template <typename Engine>
void RegisterEngine(std::string name, std::shared_ptr<Engine>& engine) {
@@ -45,6 +67,7 @@ struct InputSubsystem::Impl {
void Initialize() {
mapping_factory = std::make_shared<MappingFactory>();
RegisterEngine("updater", update_engine);
RegisterEngine("keyboard", keyboard);
RegisterEngine("mouse", mouse);
RegisterEngine("touch", touch_screen);
@@ -74,6 +97,7 @@ struct InputSubsystem::Impl {
}
void Shutdown() {
UnregisterEngine(update_engine);
UnregisterEngine(keyboard);
UnregisterEngine(mouse);
UnregisterEngine(touch_screen);
@@ -252,6 +276,7 @@ struct InputSubsystem::Impl {
}
void PumpEvents() const {
update_engine->PumpEvents();
#ifdef HAVE_SDL2
sdl->PumpEvents();
#endif
@@ -263,6 +288,7 @@ struct InputSubsystem::Impl {
std::shared_ptr<MappingFactory> mapping_factory;
std::shared_ptr<UpdateEngine> update_engine;
std::shared_ptr<Keyboard> keyboard;
std::shared_ptr<Mouse> mouse;
std::shared_ptr<TouchScreen> touch_screen;

View File

@@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
ctx.Const(element)));
return ctx.OpLoad(
ctx.F32[1],
ctx.need_input_position_indirect
? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value,
ctx.Const(element))
: AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element)));
case IR::Attribute::InstanceId:
if (ctx.profile.support_vertex_instance_id) {
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));

View File

@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
}
Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
}
} // Anonymous namespace
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
const Id thirty_two{ctx.Const(32u)};
const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask) {
const Id thread_id{GetThreadId(ctx)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
clamp = GetUpperClamp(ctx, thread_id, clamp);
}
const Id thread_id{EmitLaneId(ctx)};
const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
if (ctx.profile.warp_size_potentially_larger_than_guest) {
src_thread_id = AddPartitionBase(ctx, src_thread_id);
}
SetInBoundsFlag(inst, in_range);
return SelectValue(ctx, in_range, value, src_thread_id);
}

View File

@@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) {
U16 = Name(TypeInt(16, false), "u16");
S16 = Name(TypeInt(16, true), "s16");
}
if (info.uses_int64) {
if (info.uses_int64 && profile.support_int64) {
AddCapability(spv::Capability::Int64);
U64 = Name(TypeInt(64, false), "u64");
}
@@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
size_t label_index{0};
if (info.loads.AnyComponent(IR::Attribute::PositionX)) {
AddLabel(labels[label_index]);
const Id pointer{is_array
? OpAccessChain(input_f32, input_position, vertex, masked_index)
: OpAccessChain(input_f32, input_position, masked_index)};
const Id pointer{[&]() {
if (need_input_position_indirect) {
if (is_array)
return OpAccessChain(input_f32, input_position, vertex, u32_zero_value,
masked_index);
else
return OpAccessChain(input_f32, input_position, u32_zero_value,
masked_index);
} else {
if (is_array)
return OpAccessChain(input_f32, input_position, vertex, masked_index);
else
return OpAccessChain(input_f32, input_position, masked_index);
}
}()};
const Id result{OpLoad(F32[1], pointer)};
OpReturnValue(result);
++label_index;
@@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) {
Decorate(layer, spv::Decoration::Flat);
}
if (loads.AnyComponent(IR::Attribute::PositionX)) {
const bool is_fragment{stage != Stage::Fragment};
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
input_position = DefineInput(*this, F32[4], true, built_in);
if (profile.support_geometry_shader_passthrough) {
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
Decorate(input_position, spv::Decoration::PassthroughNV);
const bool is_fragment{stage == Stage::Fragment};
if (!is_fragment && profile.has_broken_spirv_position_input) {
need_input_position_indirect = true;
const Id input_position_struct = TypeStruct(F32[4]);
input_position = DefineInput(*this, input_position_struct, true);
MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn,
static_cast<unsigned>(spv::BuiltIn::Position));
Decorate(input_position_struct, spv::Decoration::Block);
} else {
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord
: spv::BuiltIn::Position};
input_position = DefineInput(*this, F32[4], true, built_in);
if (profile.support_geometry_shader_passthrough) {
if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) {
Decorate(input_position, spv::Decoration::PassthroughNV);
}
}
}
}

View File

@@ -280,6 +280,7 @@ public:
Id write_global_func_u32x2{};
Id write_global_func_u32x4{};
bool need_input_position_indirect{};
Id input_position{};
std::array<Id, 32> input_generics{};

View File

@@ -171,6 +171,70 @@ std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings(
}
return mapping;
}
void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program,
const Shader::VaryingState& passthrough_mask,
bool passthrough_position,
std::optional<IR::Attribute> passthrough_layer_attr) {
for (u32 i = 0; i < program.output_vertices; i++) {
// Assign generics from input
for (u32 j = 0; j < 32; j++) {
if (!passthrough_mask.Generic(j)) {
continue;
}
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
if (passthrough_position) {
// Assign position from input
const IR::Attribute attr = IR::Attribute::PositionX;
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
if (passthrough_layer_attr) {
// Assign layer
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr),
ir.Imm32(0));
}
// Emit vertex
ir.EmitVertex(ir.Imm32(0));
}
ir.EndPrimitive(ir.Imm32(0));
}
u32 GetOutputTopologyVertices(OutputTopology output_topology) {
switch (output_topology) {
case OutputTopology::PointList:
return 1;
case OutputTopology::LineStrip:
return 2;
default:
return 3;
}
}
void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) {
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::Epilogue) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
EmitGeometryPassthrough(
ir, program, program.info.passthrough,
program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {});
}
}
}
}
} // Anonymous namespace
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -195,9 +259,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
if (program.is_geometry_passthrough) {
const auto& mask{env.GpPassthroughMask()};
for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
for (size_t i = 0; i < mask.size() * 32; ++i) {
program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
}
if (!host_info.support_geometry_shader_passthrough) {
program.output_vertices = GetOutputTopologyVertices(program.output_topology);
LowerGeometryPassthrough(program, host_info);
}
}
break;
}
@@ -223,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Optimization::PositionPass(env, program);
Optimization::GlobalMemoryToStorageBufferPass(program);
Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
Optimization::TexturePass(env, program, host_info);
if (Settings::values.resolution_info.active) {
@@ -342,17 +411,7 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
IR::Program program;
program.stage = Stage::Geometry;
program.output_topology = output_topology;
switch (output_topology) {
case OutputTopology::PointList:
program.output_vertices = 1;
break;
case OutputTopology::LineStrip:
program.output_vertices = 2;
break;
default:
program.output_vertices = 3;
break;
}
program.output_vertices = GetOutputTopologyVertices(output_topology);
program.is_geometry_passthrough = false;
program.info.loads.mask = source_program.info.stores.mask;
@@ -366,35 +425,8 @@ IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool,
node.data.block = current_block;
IR::IREmitter ir{*current_block};
for (u32 i = 0; i < program.output_vertices; i++) {
// Assign generics from input
for (u32 j = 0; j < 32; j++) {
if (!program.info.stores.Generic(j)) {
continue;
}
const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4);
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
}
// Assign position from input
const IR::Attribute attr = IR::Attribute::PositionX;
ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0));
ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0));
// Assign layer
ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer),
ir.Imm32(0));
// Emit vertex
ir.EmitVertex(ir.Imm32(0));
}
ir.EndPrimitive(ir.Imm32(0));
EmitGeometryPassthrough(ir, program, program.info.stores, true,
source_program.info.emulated_layer);
IR::Block* return_block{block_pool.Create(inst_pool)};
IR::IREmitter{*return_block}.Epilogue();

View File

@@ -15,6 +15,9 @@ struct HostTranslateInfo {
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
};
} // namespace Shader

View File

@@ -11,6 +11,7 @@
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
@@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
}
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::U32 offset;
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
}
// Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes.
const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
}
@@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
}
} // Anonymous namespace
void GlobalMemoryToStorageBufferPass(IR::Program& program) {
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
StorageInfo info;
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
@@ -534,7 +538,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst};
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
const IR::U32 offset{
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
Replace(*block, *inst, index, offset);
}
}

View File

@@ -15,7 +15,7 @@ namespace Shader::Optimization {
void CollectShaderInfoPass(Environment& env, IR::Program& program);
void ConstantPropagationPass(Environment& env, IR::Program& program);
void DeadCodeEliminationPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program);
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
void IdentityRemovalPass(IR::Program& program);
void LowerFp16ToFp32(IR::Program& program);
void LowerInt64ToInt32(IR::Program& program);

View File

@@ -55,6 +55,8 @@ struct Profile {
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};
/// The Position builtin needs to be wrapped in a struct when used as an input
bool has_broken_spirv_position_input{};
/// Offset image operands with an unsigned type do not work
bool has_broken_unsigned_image_offsets{};
/// Signed instructions with unsigned data types are misinterpreted

View File

@@ -65,6 +65,8 @@ enum class Interpolation {
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct StorageBufferDescriptor {
@@ -72,6 +74,8 @@ struct StorageBufferDescriptor {
u32 cbuf_offset;
u32 count;
bool is_written;
auto operator<=>(const StorageBufferDescriptor&) const = default;
};
struct TextureBufferDescriptor {
@@ -84,6 +88,8 @@ struct TextureBufferDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
auto operator<=>(const TextureBufferDescriptor&) const = default;
};
using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>;
@@ -95,6 +101,8 @@ struct ImageBufferDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
auto operator<=>(const ImageBufferDescriptor&) const = default;
};
using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>;
@@ -110,6 +118,8 @@ struct TextureDescriptor {
u32 secondary_shift_left;
u32 count;
u32 size_shift;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
@@ -122,6 +132,8 @@ struct ImageDescriptor {
u32 cbuf_offset;
u32 count;
u32 size_shift;
auto operator<=>(const ImageDescriptor&) const = default;
};
using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;

View File

@@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
int num = 0;
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
REQUIRE(num == 0);
REQUIRE(num == 1);
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
buffer.FlushCachedWrites();

View File

@@ -85,6 +85,7 @@ add_library(video_core STATIC
gpu.h
gpu_thread.cpp
gpu_thread.h
invalidation_accumulator.h
memory_manager.cpp
memory_manager.h
precompiled_headers.h
@@ -190,6 +191,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_turbo_mode.cpp
renderer_vulkan/vk_turbo_mode.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp

View File

@@ -430,7 +430,7 @@ private:
if (query_begin >= SizeBytes() || size < 0) {
return;
}
u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
u64* const state_words = Array<type>();
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
}
// Exclude CPU modified pages when visiting GPU pages
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
const u64 word = current_word;
u64 page = page_begin;
page_begin = 0;
@@ -531,7 +531,7 @@ private:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}
@@ -564,7 +563,7 @@ private:
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked);
const u64* const untracked_words = Array<Type::Untracked>();
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
const u64* const state_words = Array<type>();
const u64 num_query_words = size / BYTES_PER_WORD + 1;
const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
u64 begin = std::numeric_limits<u64>::max();
u64 end = 0;
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
const u64 word = state_words[word_index] & ~off_word;
const u64 word = state_words[word_index];
if (word == 0) {
continue;
}

View File

@@ -1938,14 +1938,21 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
bool is_written) const {
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const u32 alignment = runtime.GetStorageBufferAlignment();
const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
const u32 aligned_size =
Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
.size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
.buffer_id = BufferId{},
};
return binding;

View File

@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
x_elements, regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
}
}

View File

@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/sw_blitter/blitter.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -20,8 +21,8 @@ namespace Tegra::Engines {
using namespace Texture;
Fermi2D::Fermi2D(MemoryManager& memory_manager_) {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);
Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
// Nvidia's OpenGL driver seems to assume these values
regs.src.depth = 1;
regs.dst.depth = 1;
@@ -104,6 +105,7 @@ void Fermi2D::Blit() {
config.src_x0 = 0;
}
memory_manager.FlushCaching();
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
sw_blitter->Blit(src, regs.dst, config);
}

View File

@@ -305,6 +305,7 @@ public:
private:
VideoCore::RasterizerInterface* rasterizer = nullptr;
std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.

View File

@@ -467,7 +467,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
}
void Maxwell3D::ProcessFirmwareCall4() {
LOG_WARNING(HW_GPU, "(STUBBED) called");
LOG_DEBUG(HW_GPU, "(STUBBED) called");
// Firmware call 4 is a blob that changes some registers depending on its parameters.
// These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
@@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
}
switch (regs.report_semaphore.query.operation) {
case Regs::ReportSemaphore::Operation::Release:
if (regs.report_semaphore.query.short_query != 0) {
@@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
const GPUVAddr address{buffer_address + regs.const_buffer.offset};
const size_t copy_size = amount * sizeof(u32);
memory_manager.WriteBlock(address, start_base, copy_size);
memory_manager.WriteBlockCached(address, start_base, copy_size);
// Increment the current buffer position.
regs.const_buffer.offset += static_cast<u32>(copy_size);

View File

@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
if (launch.multi_line_enable) {
const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
memory_manager.FlushCaching();
if (!is_src_pitch && !is_dst_pitch) {
// If both the source and the destination are in block layout, assert.
CopyBlockLinearToBlockLinear();
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
reinterpret_cast<u8*>(tmp_buffer.data()),
regs.line_length_in * sizeof(u32));
} else {
memory_manager.FlushCaching();
const auto convert_linear_2_blocklinear_addr = [](u64 address) {
return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -121,8 +122,8 @@ void MaxwellDMA::Launch() {
memory_manager.ReadBlockUnsafe(
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_buffer.data(), tmp_buffer.size());
memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
tmp_buffer.size());
}
} else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
tmp_buffer.size());
memory_manager.WriteBlock(
memory_manager.WriteBlockCached(
convert_linear_2_blocklinear_addr(regs.offset_out + offset),
tmp_buffer.data(), tmp_buffer.size());
}
@@ -141,8 +142,8 @@ void MaxwellDMA::Launch() {
std::vector<u8> tmp_buffer(regs.line_length_in);
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
regs.line_length_in);
}
}
}
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
regs.src_params.block_size.height, regs.src_params.block_size.depth,
regs.pitch_out);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch);
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
}
void MaxwellDMA::ReleaseSemaphore() {

View File

@@ -47,6 +47,7 @@ set(SHADER_FILES
vulkan_present_scaleforce_fp16.frag
vulkan_present_scaleforce_fp32.frag
vulkan_quad_indexed.comp
vulkan_turbo_mode.comp
vulkan_uint8.comp
)

View File

@@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 460 core
layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
layout (binding = 0) buffer ThreadData {
uint data[];
};
uint xorshift32(uint x) {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return x;
}
uint getGlobalIndex() {
return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
}
void main() {
uint myIndex = xorshift32(getGlobalIndex());
uint otherIndex = xorshift32(myIndex);
uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
atomicAdd(data[myIndex % data.length()], otherValue);
}

View File

@@ -0,0 +1,79 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <utility>
#include <vector>
#include "common/common_types.h"
namespace VideoCommon {
class InvalidationAccumulator {
public:
InvalidationAccumulator() = default;
~InvalidationAccumulator() = default;
void Add(GPUVAddr address, size_t size) {
const auto reset_values = [&]() {
if (has_collected) {
buffer.emplace_back(start_address, accumulated_size);
}
start_address = address;
accumulated_size = size;
last_collection = start_address + size;
};
if (address >= start_address && address + size <= last_collection) [[likely]] {
return;
}
size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
address = address & atomicity_mask;
if (!has_collected) [[unlikely]] {
reset_values();
has_collected = true;
return;
}
if (address != last_collection) [[unlikely]] {
reset_values();
return;
}
accumulated_size += size;
last_collection += size;
}
void Clear() {
buffer.clear();
start_address = 0;
last_collection = 0;
has_collected = false;
}
bool AnyAccumulated() const {
return has_collected;
}
template <typename Func>
void Callback(Func&& func) {
if (!has_collected) {
return;
}
buffer.emplace_back(start_address, accumulated_size);
for (auto& [address, size] : buffer) {
func(address, size);
}
}
private:
static constexpr size_t atomicity_bits = 5;
static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
static constexpr size_t atomicity_size_mask = atomicity_size - 1;
static constexpr size_t atomicity_mask = ~atomicity_size_mask;
GPUVAddr start_address{};
GPUVAddr last_collection{};
size_t accumulated_size{};
bool has_collected{};
std::vector<std::pair<VAddr, size_t>> buffer;
};
} // namespace VideoCommon

View File

@@ -50,38 +50,6 @@ protected:
Maxwell3D& maxwell3d;
};
class HLE_DrawArrays final : public HLEMacroImpl {
public:
explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
maxwell3d.RefreshParameters();
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
maxwell3d.regs.global_base_instance_index, 1);
}
};
class HLE_DrawIndexed final : public HLEMacroImpl {
public:
explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
maxwell3d.RefreshParameters();
maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
maxwell3d.regs.index_buffer.format =
static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
maxwell3d.regs.global_base_vertex_index,
maxwell3d.regs.global_base_instance_index, 1);
}
};
/*
* @note: these macros have two versions, a normal and extended version, with the extended version
* also assigning the base vertex/instance.
@@ -497,11 +465,6 @@ public:
} // Anonymous namespace
HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
builders.emplace(0xDD6A7FA92A7D2674ULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
return std::make_unique<HLE_DrawArrays>(maxwell3d__);
}));
builders.emplace(0x0D61FC9FAAC9FCADULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
@@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
}));
builders.emplace(0x2DB33AADB741839CULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
}));
builders.emplace(0x771BB18C62444DA0ULL,
std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
[](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {

View File

@@ -6,11 +6,13 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h"
#include "core/memory.h"
#include "video_core/invalidation_accumulator.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
@@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
page_bits != big_page_bits ? page_bits : 0},
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
1, std::memory_order_acq_rel)} {
1, std::memory_order_acq_rel)},
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
address_space_size = 1ULL << address_space_bits;
page_size = 1ULL << page_bits;
page_mask = page_size - 1ULL;
@@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
big_page_table_cpu.resize(big_page_table_size);
big_page_continous.resize(big_page_table_size / continous_bits, 0);
entries.resize(page_table_size / 32, 0);
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
} else {
fastmem_arena = nullptr;
}
}
MemoryManager::~MemoryManager() = default;
@@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
if (size == 0) {
return;
}
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
for (const auto& [map_addr, map_size] : submapped_ranges) {
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
ASSERT(cpu_addr);
rasterizer->UnmapMemory(*cpu_addr, map_size);
for (const auto& [map_addr, map_size] : page_stash) {
rasterizer->UnmapMemory(map_addr, map_size);
}
page_stash.clear();
BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
}
}
template <bool is_safe>
template <bool is_safe, bool use_fastmem>
void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
[[maybe_unused]] VideoCommon::CacheType which) const {
auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
if constexpr (use_fastmem) {
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
}
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
if constexpr (is_safe) {
rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
}
if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
if constexpr (use_fastmem) {
std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
if (!IsBigPageContinous(page_index)) [[unlikely]] {
memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
} else {
u8* physical = memory.GetPointer(cpu_addr_base);
std::memcpy(dest_buffer, physical, copy_amount);
}
}
dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
};
@@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const {
ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which);
if (fastmem_arena) [[likely]] {
ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
return;
}
ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
}
void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
if (fastmem_arena) [[likely]] {
ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
return;
}
ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
}
template <bool is_safe>
@@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
}
void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
std::size_t size) {
WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
accumulator->Add(gpu_dest_addr, size);
}
void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
VideoCommon::CacheType which) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
GPUVAddr gpu_addr, std::size_t size) const {
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
GetSubmappedRangeImpl<true>(gpu_addr, size, result);
return result;
}
template <bool is_gpu_address>
void MemoryManager::GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
result) const {
std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
last_segment{};
std::optional<VAddr> old_page_addr{};
const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset,
@@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
if constexpr (is_gpu_address) {
const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment = {cpu_addr_base, copy_amount};
}
} else {
last_segment->second += copy_amount;
}
@@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
}
old_page_addr = {cpu_addr_base + copy_amount};
if (!last_segment) {
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
if constexpr (is_gpu_address) {
const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
last_segment = {new_base_addr, copy_amount};
} else {
last_segment = {cpu_addr_base, copy_amount};
}
} else {
last_segment->second += copy_amount;
}
@@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
};
MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
split(0, 0, 0);
return result;
}
void MemoryManager::FlushCaching() {
if (!accumulator->AnyAccumulated()) {
return;
}
accumulator->Callback([this](GPUVAddr addr, size_t size) {
GetSubmappedRangeImpl<false>(addr, size, page_stash);
});
rasterizer->InnerInvalidation(page_stash);
page_stash.clear();
accumulator->Clear();
}
} // namespace Tegra

View File

@@ -19,6 +19,10 @@ namespace VideoCore {
class RasterizerInterface;
}
namespace VideoCommon {
class InvalidationAccumulator;
}
namespace Core {
class DeviceMemory;
namespace Memory {
@@ -80,6 +84,7 @@ public:
*/
void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
/**
* Checks if a gpu region can be simply read with a pointer.
@@ -129,12 +134,14 @@ public:
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
size_t max_size = std::numeric_limits<size_t>::max()) const;
void FlushCaching();
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
template <bool is_safe>
template <bool is_safe, bool use_fastmem>
void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
VideoCommon::CacheType which) const;
@@ -154,6 +161,12 @@ private:
inline bool IsBigPageContinous(size_t big_page_index) const;
inline void SetBigPageContinous(size_t big_page_index, bool value);
template <bool is_gpu_address>
void GetSubmappedRangeImpl(
GPUVAddr gpu_addr, std::size_t size,
std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
result) const;
Core::System& system;
Core::Memory::Memory& memory;
Core::DeviceMemory& device_memory;
@@ -201,10 +214,13 @@ private:
Common::VirtualBuffer<u32> big_page_table_cpu;
std::vector<u64> big_page_continous;
std::vector<std::pair<VAddr, std::size_t>> page_stash{};
u8* fastmem_arena{};
constexpr static size_t continous_bits = 64;
const size_t unique_identifier;
std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
static std::atomic<size_t> unique_identifier_generator;
};

View File

@@ -6,6 +6,7 @@
#include <functional>
#include <optional>
#include <span>
#include <utility>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/cache_types.h"
@@ -95,6 +96,12 @@ public:
virtual void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
for (const auto& [cpu_addr, size] : sequences) {
InvalidateRegion(cpu_addr, size);
}
}
/// Notify rasterizer that any caches of the specified region are desync with guest
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;

View File

@@ -160,6 +160,10 @@ public:
return device.CanReportMemoryUsage();
}
u32 GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetShaderStorageBufferAlignment());
}
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,

View File

@@ -139,6 +139,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_load
void RasterizerOpenGL::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(OpenGL_Clears);
gpu_memory->FlushCaching();
const auto& regs = maxwell3d->regs;
bool use_color{};
bool use_depth{};
@@ -207,6 +208,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
gpu_memory->FlushCaching();
query_cache.UpdateCounters();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
@@ -319,6 +321,7 @@ void RasterizerOpenGL::DrawIndirect() {
}
void RasterizerOpenGL::DispatchCompute() {
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
@@ -526,6 +529,7 @@ void RasterizerOpenGL::TickFrame() {
}
bool RasterizerOpenGL::AccelerateConditionalRendering() {
gpu_memory->FlushCaching();
if (Settings::IsGPULevelHigh()) {
// Reimplement Host conditional rendering.
return false;

View File

@@ -236,6 +236,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.needs_demote_reorder = device.IsAmd(),
.support_snorm_render_buffer = false,
.support_viewport_index_layer = device.HasVertexViewportLayer(),
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();

View File

@@ -442,7 +442,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glBindTextureUnit(0, screen_info.display_texture);
const auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
if (anti_aliasing > Settings::AntiAliasing::LastAA) {
LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing);
anti_aliasing = Settings::AntiAliasing::None;
Settings::values.anti_aliasing.SetValue(anti_aliasing);
}
if (anti_aliasing != Settings::AntiAliasing::None) {
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = screen_info.texture.width;

View File

@@ -60,24 +60,13 @@ std::string GetDriverVersion(const Device& device) {
return GetReadableVersion(version);
}
std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
std::sort(std::begin(available_extensions), std::end(available_extensions));
static constexpr std::size_t AverageExtensionSize = 64;
std::string separated_extensions;
separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
const auto end = std::end(available_extensions);
for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
if (const bool is_last = extension + 1 == end; is_last) {
separated_extensions += *extension;
} else {
separated_extensions += fmt::format("{},", *extension);
}
}
return separated_extensions;
std::string BuildCommaSeparatedExtensions(
const std::set<std::string, std::less<>>& available_extensions) {
return fmt::format("{}", fmt::join(available_extensions, ","));
}
} // Anonymous namespace
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface) {
const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@@ -89,7 +78,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
const vk::PhysicalDevice physical_device(devices[device_index], dld);
return Device(*instance, physical_device, surface, dld);
}
} // Anonymous namespace
RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
@@ -98,7 +86,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
@@ -109,6 +97,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
screen_info),
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
state_tracker, scheduler) {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
}
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
@@ -116,6 +108,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
}
RendererVulkan::~RendererVulkan() {
scheduler.RegisterOnSubmit([] {});
void(device.GetLogical().WaitIdle());
}

View File

@@ -13,6 +13,7 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,6 +32,9 @@ class GPU;
namespace Vulkan {
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface);
class RendererVulkan final : public VideoCore::RendererBase {
public:
explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -74,6 +78,7 @@ private:
Swapchain swapchain;
BlitScreen blit_screen;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
};
} // namespace Vulkan

View File

@@ -330,12 +330,19 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
return device.CanReportMemoryUsage();
}
u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}
void BufferCacheRuntime::Finish() {
scheduler.Finish();
}
void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
return;
}
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -394,6 +401,9 @@ void BufferCacheRuntime::PostCopyBarrier() {
}
void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
if (dest_buffer == VK_NULL_HANDLE) {
return;
}
static constexpr VkMemoryBarrier READ_BARRIER{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
@@ -473,6 +483,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
});
} else {
if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
ReserveNullBuffer();
buffer = *null_buffer;
offset = 0;
}
scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffer(index, buffer, offset);
});

View File

@@ -73,6 +73,8 @@ public:
bool CanReportMemoryUsage() const;
u32 GetStorageBufferAlignment() const;
[[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);

View File

@@ -331,6 +331,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.need_declared_frag_colors = false,
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
.has_broken_unsigned_image_offsets = false,
.has_broken_signed_operations = false,
.has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
@@ -343,6 +344,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
.support_snorm_render_buffer = true,
.support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
};
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {
@@ -790,7 +793,8 @@ vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::
return create_pipeline_cache(0, nullptr);
}
const size_t cache_size = static_cast<size_t>(end) - magic_number.size();
static constexpr size_t header_size = magic_number.size() + sizeof(cache_version);
const size_t cache_size = static_cast<size_t>(end) - header_size;
std::vector<char> cache_data(cache_size);
file.read(cache_data.data(), cache_size);

View File

@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
gpu_memory->FlushCaching();
query_cache.UpdateCounters();
@@ -269,6 +270,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
FlushWork();
gpu_memory->FlushCaching();
query_cache.UpdateCounters();
@@ -393,6 +395,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
void RasterizerVulkan::DispatchCompute() {
FlushWork();
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
if (!pipeline) {
@@ -481,6 +484,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
}
}
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
{
std::scoped_lock lock{texture_cache.mutex};
for (const auto& [addr, size] : sequences) {
texture_cache.WriteMemory(addr, size);
}
}
{
std::scoped_lock lock{buffer_cache.mutex};
for (const auto& [addr, size] : sequences) {
buffer_cache.WriteMemory(addr, size);
}
}
{
for (const auto& [addr, size] : sequences) {
query_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
}
}
}
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
@@ -605,6 +629,7 @@ void RasterizerVulkan::TickFrame() {
}
bool RasterizerVulkan::AccelerateConditionalRendering() {
gpu_memory->FlushCaching();
if (Settings::IsGPULevelHigh()) {
// TODO(Blinkhawk): Reimplement Host conditional rendering.
return false;

View File

@@ -79,6 +79,7 @@ public:
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InvalidateRegion(VAddr addr, u64 size,
VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;

View File

@@ -213,6 +213,11 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
if (on_submit) {
on_submit();
}
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;

View File

@@ -5,6 +5,7 @@
#include <condition_variable>
#include <cstddef>
#include <functional>
#include <memory>
#include <thread>
#include <utility>
@@ -66,6 +67,11 @@ public:
query_cache = &query_cache_;
}
// Registers a callback to perform on queue submission.
void RegisterOnSubmit(std::function<void()>&& func) {
on_submit = std::move(func);
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
@@ -216,6 +222,7 @@ private:
vk::CommandBuffer current_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit;
State state;

View File

@@ -0,0 +1,222 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/literals.h"
#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/vulkan_common/vulkan_device.h"
namespace Vulkan {
using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
: m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
{
std::scoped_lock lk{m_submission_lock};
m_submission_time = std::chrono::steady_clock::now();
}
m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
}
TurboMode::~TurboMode() = default;
void TurboMode::QueueSubmitted() {
std::scoped_lock lk{m_submission_lock};
m_submission_time = std::chrono::steady_clock::now();
m_submission_cv.notify_one();
}
void TurboMode::Run(std::stop_token stop_token) {
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = 2_MiB,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
// Commit some device local memory for the buffer.
auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
constexpr VkDescriptorPoolSize pool_size{
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
};
auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
});
// Create the descriptor set layout from the pool.
constexpr VkDescriptorSetLayoutBinding layout_binding{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
};
auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.bindingCount = 1,
.pBindings = &layout_binding,
});
// Actually create the descriptor set.
auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *descriptor_pool,
.descriptorSetCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
});
// Create the shader.
auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
// Create the pipeline layout.
auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.setLayoutCount = 1,
.pSetLayouts = descriptor_set_layout.address(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
});
// Actually create the pipeline.
const VkPipelineShaderStageCreateInfo shader_stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *shader,
.pName = "main",
.pSpecializationInfo = nullptr,
};
auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage = shader_stage,
.layout = *pipeline_layout,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
// Create a fence to wait on.
auto fence = dld.CreateFence(VkFenceCreateInfo{
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
});
// Create a command pool to allocate a command buffer from.
auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = nullptr,
.flags =
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = m_device.GetGraphicsFamily(),
});
// Create a single command buffer.
auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
while (!stop_token.stop_requested()) {
// Reset the fence.
fence.Reset();
// Update descriptor set.
const VkDescriptorBufferInfo buffer_info{
.buffer = *buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
const VkWriteDescriptorSet buffer_write{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = nullptr,
.dstSet = descriptor_set[0],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pImageInfo = nullptr,
.pBufferInfo = &buffer_info,
.pTexelBufferView = nullptr,
};
dld.UpdateDescriptorSets(std::array{buffer_write}, {});
// Set up the command buffer.
cmdbuf.Begin(VkCommandBufferBeginInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = nullptr,
});
// Clear the buffer.
cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
// Bind descriptor set.
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
descriptor_set, {});
// Bind the pipeline.
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
// Dispatch.
cmdbuf.Dispatch(64, 64, 1);
// Finish.
cmdbuf.End();
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = nullptr,
.waitSemaphoreCount = 0,
.pWaitSemaphores = nullptr,
.pWaitDstStageMask = nullptr,
.commandBufferCount = 1,
.pCommandBuffers = cmdbuf.address(),
.signalSemaphoreCount = 0,
.pSignalSemaphores = nullptr,
};
m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
// Wait for completion.
fence.Wait();
// Wait for the next graphics queue submission if necessary.
std::unique_lock lk{m_submission_lock};
Common::CondvarWait(m_submission_cv, lk, stop_token, [this] {
return (std::chrono::steady_clock::now() - m_submission_time) <=
std::chrono::milliseconds{100};
});
}
}
} // namespace Vulkan

View File

@@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <chrono>
#include <mutex>
#include "common/polyfill_thread.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class TurboMode {
public:
explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
~TurboMode();
void QueueSubmitted();
private:
void Run(std::stop_token stop_token);
Device m_device;
MemoryAllocator m_allocator;
std::mutex m_submission_lock;
std::condition_variable_any m_submission_cv;
std::chrono::time_point<std::chrono::steady_clock> m_submission_time{};
std::jthread m_thread;
};
} // namespace Vulkan

File diff suppressed because it is too large Load Diff

View File

@@ -3,6 +3,7 @@
#pragma once
#include <set>
#include <span>
#include <string>
#include <unordered_map>
@@ -11,6 +12,156 @@
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
// Define all features which may be used by the implementation here.
// Vulkan version in the macro describes the minimum version required for feature availability.
// If the Vulkan version is lower than the required version, the named extension is required.
#define FOR_EACH_VK_FEATURE_1_1(FEATURE) \
FEATURE(EXT, SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, subgroup_size_control) \
FEATURE(KHR, 16BitStorage, 16BIT_STORAGE, bit16_storage) \
FEATURE(KHR, ShaderAtomicInt64, SHADER_ATOMIC_INT64, shader_atomic_int64) \
FEATURE(KHR, ShaderDrawParameters, SHADER_DRAW_PARAMETERS, shader_draw_parameters) \
FEATURE(KHR, ShaderFloat16Int8, SHADER_FLOAT16_INT8, shader_float16_int8) \
FEATURE(KHR, UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, \
uniform_buffer_standard_layout) \
FEATURE(KHR, VariablePointer, VARIABLE_POINTERS, variable_pointer)
#define FOR_EACH_VK_FEATURE_1_2(FEATURE) \
FEATURE(EXT, HostQueryReset, HOST_QUERY_RESET, host_query_reset) \
FEATURE(KHR, 8BitStorage, 8BIT_STORAGE, bit8_storage) \
FEATURE(KHR, TimelineSemaphore, TIMELINE_SEMAPHORE, timeline_semaphore)
#define FOR_EACH_VK_FEATURE_1_3(FEATURE) \
FEATURE(EXT, ShaderDemoteToHelperInvocation, SHADER_DEMOTE_TO_HELPER_INVOCATION, \
shader_demote_to_helper_invocation)
// Define all features which may be used by the implementation and require an extension here.
#define FOR_EACH_VK_FEATURE_EXT(FEATURE) \
FEATURE(EXT, CustomBorderColor, CUSTOM_BORDER_COLOR, custom_border_color) \
FEATURE(EXT, DepthClipControl, DEPTH_CLIP_CONTROL, depth_clip_control) \
FEATURE(EXT, ExtendedDynamicState, EXTENDED_DYNAMIC_STATE, extended_dynamic_state) \
FEATURE(EXT, ExtendedDynamicState2, EXTENDED_DYNAMIC_STATE_2, extended_dynamic_state2) \
FEATURE(EXT, ExtendedDynamicState3, EXTENDED_DYNAMIC_STATE_3, extended_dynamic_state3) \
FEATURE(EXT, IndexTypeUint8, INDEX_TYPE_UINT8, index_type_uint8) \
FEATURE(EXT, LineRasterization, LINE_RASTERIZATION, line_rasterization) \
FEATURE(EXT, PrimitiveTopologyListRestart, PRIMITIVE_TOPOLOGY_LIST_RESTART, \
primitive_topology_list_restart) \
FEATURE(EXT, ProvokingVertex, PROVOKING_VERTEX, provoking_vertex) \
FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
workgroup_memory_explicit_layout)
// Define miscellaneous extensions which may be used by the implementation here.
#define FOR_EACH_VK_EXTENSION(EXTENSION) \
EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \
EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \
EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \
EXTENSION(EXT, ROBUSTNESS_2, robustness_2) \
EXTENSION(EXT, SAMPLER_FILTER_MINMAX, sampler_filter_minmax) \
EXTENSION(EXT, SHADER_STENCIL_EXPORT, shader_stencil_export) \
EXTENSION(EXT, SHADER_VIEWPORT_INDEX_LAYER, shader_viewport_index_layer) \
EXTENSION(EXT, TOOLING_INFO, tooling_info) \
EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \
EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \
EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \
EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \
EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \
EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \
EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \
EXTENSION(KHR, SPIRV_1_4, spirv_1_4) \
EXTENSION(KHR, SWAPCHAIN, swapchain) \
EXTENSION(KHR, SWAPCHAIN_MUTABLE_FORMAT, swapchain_mutable_format) \
EXTENSION(NV, DEVICE_DIAGNOSTICS_CONFIG, device_diagnostics_config) \
EXTENSION(NV, GEOMETRY_SHADER_PASSTHROUGH, geometry_shader_passthrough) \
EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \
EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle)
#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \
EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32)
// Define extensions which must be supported.
#define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME)
#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME)
#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \
EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME)
// Define extensions where the absence of the extension may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME) \
EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME) \
EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME) \
EXTENSION_NAME(VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME)
// Define features which must be supported.
#define FOR_EACH_VK_MANDATORY_FEATURE(FEATURE_NAME) \
FEATURE_NAME(bit16_storage, storageBuffer16BitAccess) \
FEATURE_NAME(bit16_storage, uniformAndStorageBuffer16BitAccess) \
FEATURE_NAME(bit8_storage, storageBuffer8BitAccess) \
FEATURE_NAME(bit8_storage, uniformAndStorageBuffer8BitAccess) \
FEATURE_NAME(features, depthBiasClamp) \
FEATURE_NAME(features, depthClamp) \
FEATURE_NAME(features, drawIndirectFirstInstance) \
FEATURE_NAME(features, dualSrcBlend) \
FEATURE_NAME(features, fillModeNonSolid) \
FEATURE_NAME(features, fragmentStoresAndAtomics) \
FEATURE_NAME(features, geometryShader) \
FEATURE_NAME(features, imageCubeArray) \
FEATURE_NAME(features, independentBlend) \
FEATURE_NAME(features, largePoints) \
FEATURE_NAME(features, logicOp) \
FEATURE_NAME(features, multiDrawIndirect) \
FEATURE_NAME(features, multiViewport) \
FEATURE_NAME(features, occlusionQueryPrecise) \
FEATURE_NAME(features, robustBufferAccess) \
FEATURE_NAME(features, samplerAnisotropy) \
FEATURE_NAME(features, sampleRateShading) \
FEATURE_NAME(features, shaderClipDistance) \
FEATURE_NAME(features, shaderCullDistance) \
FEATURE_NAME(features, shaderImageGatherExtended) \
FEATURE_NAME(features, shaderStorageImageWriteWithoutFormat) \
FEATURE_NAME(features, tessellationShader) \
FEATURE_NAME(features, vertexPipelineStoresAndAtomics) \
FEATURE_NAME(features, wideLines) \
FEATURE_NAME(host_query_reset, hostQueryReset) \
FEATURE_NAME(robustness2, nullDescriptor) \
FEATURE_NAME(robustness2, robustBufferAccess2) \
FEATURE_NAME(robustness2, robustImageAccess2) \
FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
FEATURE_NAME(variable_pointer, variablePointers) \
FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)
// Define features where the absence of the feature may result in a degraded experience.
#define FOR_EACH_VK_RECOMMENDED_FEATURE(FEATURE_NAME) \
FEATURE_NAME(custom_border_color, customBorderColors) \
FEATURE_NAME(extended_dynamic_state, extendedDynamicState) \
FEATURE_NAME(index_type_uint8, indexTypeUint8) \
FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart) \
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
FEATURE_NAME(shader_float16_int8, shaderFloat16) \
FEATURE_NAME(shader_float16_int8, shaderInt8) \
FEATURE_NAME(transform_feedback, transformFeedback) \
FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \
FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
namespace Vulkan {
class NsightAftermathTracker;
@@ -88,67 +239,69 @@ public:
/// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
u32 ApiVersion() const {
return properties.apiVersion;
return properties.properties.apiVersion;
}
/// Returns the current driver version provided in Vulkan-formatted version numbers.
u32 GetDriverVersion() const {
return properties.driverVersion;
return properties.properties.driverVersion;
}
/// Returns the device name.
std::string_view GetModelName() const {
return properties.deviceName;
return properties.properties.deviceName;
}
/// Returns the driver ID.
VkDriverIdKHR GetDriverID() const {
return driver_id;
return properties.driver.driverID;
}
bool ShouldBoostClocks() const;
/// Returns uniform buffer alignment requeriment.
VkDeviceSize GetUniformBufferAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
return properties.properties.limits.minUniformBufferOffsetAlignment;
}
/// Returns storage alignment requeriment.
VkDeviceSize GetStorageBufferAlignment() const {
return properties.limits.minStorageBufferOffsetAlignment;
return properties.properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns the maximum range for storage buffers.
VkDeviceSize GetMaxStorageBufferRange() const {
return properties.limits.maxStorageBufferRange;
return properties.properties.limits.maxStorageBufferRange;
}
/// Returns the maximum size for push constants.
VkDeviceSize GetMaxPushConstantsSize() const {
return properties.limits.maxPushConstantsSize;
return properties.properties.limits.maxPushConstantsSize;
}
/// Returns the maximum size for shared memory.
u32 GetMaxComputeSharedMemorySize() const {
return properties.limits.maxComputeSharedMemorySize;
return properties.properties.limits.maxComputeSharedMemorySize;
}
/// Returns float control properties of the device.
const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
return float_controls;
return properties.float_controls;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
return features.features.textureCompressionASTC_LDR;
}
/// Returns true if the device supports float16 natively.
bool IsFloat16Supported() const {
return is_float16_supported;
return features.shader_float16_int8.shaderFloat16;
}
/// Returns true if the device supports int8 natively.
bool IsInt8Supported() const {
return is_int8_supported;
return features.shader_float16_int8.shaderInt8;
}
/// Returns true if the device warp size can potentially be bigger than guest's warp size.
@@ -158,32 +311,32 @@ public:
/// Returns true if the device can be forced to use the guest warp size.
bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
return guest_warp_stages & stage;
return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
}
/// Returns the maximum number of push descriptors.
u32 MaxPushDescriptors() const {
return max_push_descriptors;
return properties.push_descriptor.maxPushDescriptors;
}
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
return features.features.shaderStorageImageReadWithoutFormat;
}
/// Returns true if shader int64 is supported.
bool IsShaderInt64Supported() const {
return is_shader_int64_supported;
return features.features.shaderInt64;
}
/// Returns true if shader int16 is supported.
bool IsShaderInt16Supported() const {
return is_shader_int16_supported;
return features.features.shaderInt16;
}
// Returns true if depth bounds is supported.
bool IsDepthBoundsSupported() const {
return is_depth_bounds_supported;
return features.features.depthBounds;
}
/// Returns true when blitting from and to depth stencil images is supported.
@@ -193,151 +346,151 @@ public:
/// Returns true if the device supports VK_NV_viewport_swizzle.
bool IsNvViewportSwizzleSupported() const {
return nv_viewport_swizzle;
return extensions.viewport_swizzle;
}
/// Returns true if the device supports VK_NV_viewport_array2.
bool IsNvViewportArray2Supported() const {
return nv_viewport_array2;
return extensions.viewport_array2;
}
/// Returns true if the device supports VK_NV_geometry_shader_passthrough.
bool IsNvGeometryShaderPassthroughSupported() const {
return nv_geometry_shader_passthrough;
return extensions.geometry_shader_passthrough;
}
/// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
return extensions.uniform_buffer_standard_layout;
}
/// Returns true if the device supports VK_KHR_push_descriptor.
bool IsKhrPushDescriptorSupported() const {
return khr_push_descriptor;
return extensions.push_descriptor;
}
/// Returns true if VK_KHR_pipeline_executable_properties is enabled.
bool IsKhrPipelineExecutablePropertiesEnabled() const {
return khr_pipeline_executable_properties;
return extensions.pipeline_executable_properties;
}
/// Returns true if VK_KHR_swapchain_mutable_format is enabled.
bool IsKhrSwapchainMutableFormatEnabled() const {
return khr_swapchain_mutable_format;
return extensions.swapchain_mutable_format;
}
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
return khr_workgroup_memory_explicit_layout;
return extensions.workgroup_memory_explicit_layout;
}
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
bool IsTopologyListPrimitiveRestartSupported() const {
return is_topology_list_restart_supported;
return features.primitive_topology_list_restart.primitiveTopologyListRestart;
}
/// Returns true if the device supports VK_EXT_primitive_topology_list_restart.
bool IsPatchListPrimitiveRestartSupported() const {
return is_patch_list_restart_supported;
return features.primitive_topology_list_restart.primitiveTopologyPatchListRestart;
}
/// Returns true if the device supports VK_EXT_index_type_uint8.
bool IsExtIndexTypeUint8Supported() const {
return ext_index_type_uint8;
return extensions.index_type_uint8;
}
/// Returns true if the device supports VK_EXT_sampler_filter_minmax.
bool IsExtSamplerFilterMinmaxSupported() const {
return ext_sampler_filter_minmax;
return extensions.sampler_filter_minmax;
}
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
bool IsExtDepthRangeUnrestrictedSupported() const {
return ext_depth_range_unrestricted;
return extensions.depth_range_unrestricted;
}
/// Returns true if the device supports VK_EXT_depth_clip_control.
bool IsExtDepthClipControlSupported() const {
return ext_depth_clip_control;
return extensions.depth_clip_control;
}
/// Returns true if the device supports VK_EXT_shader_viewport_index_layer.
bool IsExtShaderViewportIndexLayerSupported() const {
return ext_shader_viewport_index_layer;
return extensions.shader_viewport_index_layer;
}
/// Returns true if the device supports VK_EXT_subgroup_size_control.
bool IsExtSubgroupSizeControlSupported() const {
return ext_subgroup_size_control;
return extensions.subgroup_size_control;
}
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
return extensions.transform_feedback;
}
/// Returns true if the device supports VK_EXT_custom_border_color.
bool IsExtCustomBorderColorSupported() const {
return ext_custom_border_color;
return extensions.custom_border_color;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state.
bool IsExtExtendedDynamicStateSupported() const {
return ext_extended_dynamic_state;
return extensions.extended_dynamic_state;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state2.
bool IsExtExtendedDynamicState2Supported() const {
return ext_extended_dynamic_state_2;
return extensions.extended_dynamic_state2;
}
bool IsExtExtendedDynamicState2ExtrasSupported() const {
return ext_extended_dynamic_state_2_extra;
return features.extended_dynamic_state2.extendedDynamicState2LogicOp;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state3.
bool IsExtExtendedDynamicState3Supported() const {
return ext_extended_dynamic_state_3;
return extensions.extended_dynamic_state3;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state3.
bool IsExtExtendedDynamicState3BlendingSupported() const {
return ext_extended_dynamic_state_3_blend;
return dynamic_state3_blending;
}
/// Returns true if the device supports VK_EXT_extended_dynamic_state3.
bool IsExtExtendedDynamicState3EnablesSupported() const {
return ext_extended_dynamic_state_3_enables;
return dynamic_state3_enables;
}
/// Returns true if the device supports VK_EXT_line_rasterization.
bool IsExtLineRasterizationSupported() const {
return ext_line_rasterization;
return extensions.line_rasterization;
}
/// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
bool IsExtVertexInputDynamicStateSupported() const {
return ext_vertex_input_dynamic_state;
return extensions.vertex_input_dynamic_state;
}
/// Returns true if the device supports VK_EXT_shader_stencil_export.
bool IsExtShaderStencilExportSupported() const {
return ext_shader_stencil_export;
return extensions.shader_stencil_export;
}
/// Returns true if the device supports VK_EXT_conservative_rasterization.
bool IsExtConservativeRasterizationSupported() const {
return ext_conservative_rasterization;
return extensions.conservative_rasterization;
}
/// Returns true if the device supports VK_EXT_provoking_vertex.
bool IsExtProvokingVertexSupported() const {
return ext_provoking_vertex;
return extensions.provoking_vertex;
}
/// Returns true if the device supports VK_KHR_shader_atomic_int64.
bool IsExtShaderAtomicInt64Supported() const {
return ext_shader_atomic_int64;
return extensions.shader_atomic_int64;
}
/// Returns the minimum supported version of SPIR-V.
@@ -345,7 +498,7 @@ public:
if (instance_version >= VK_API_VERSION_1_3) {
return 0x00010600U;
}
if (khr_spirv_1_4) {
if (extensions.spirv_1_4) {
return 0x00010400U;
}
return 0x00010000U;
@@ -363,11 +516,11 @@ public:
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
return properties.driver.driverName;
}
/// Returns the list of available extensions.
const std::vector<std::string>& GetAvailableExtensions() const {
const std::set<std::string, std::less<>>& GetAvailableExtensions() const {
return supported_extensions;
}
@@ -376,7 +529,7 @@ public:
}
bool CanReportMemoryUsage() const {
return ext_memory_budget;
return extensions.memory_budget;
}
u64 GetDeviceMemoryUsage() const;
@@ -397,33 +550,30 @@ public:
return must_emulate_bgr565;
}
bool HasNullDescriptor() const {
return features.robustness2.nullDescriptor;
}
u32 GetMaxVertexInputAttributes() const {
return max_vertex_input_attributes;
return properties.properties.limits.maxVertexInputAttributes;
}
u32 GetMaxVertexInputBindings() const {
return max_vertex_input_bindings;
return properties.properties.limits.maxVertexInputBindings;
}
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
/// Checks if the physical device is suitable and configures the object state
/// with all necessary info about its properties.
bool GetSuitability(bool requires_swapchain);
/// Loads extensions into a vector and stores available ones in this object.
std::vector<const char*> LoadExtensions(bool requires_surface);
// Remove extensions which have incomplete feature support.
void RemoveUnsuitableExtensions();
void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
/// Sets up queue families.
void SetupFamilies(VkSurfaceKHR surface);
/// Sets up device features.
void SetupFeatures();
/// Sets up device properties.
void SetupProperties();
/// Collects telemetry information from the device.
void CollectTelemetryParameters();
/// Collects information about attached tools.
void CollectToolingInfo();
@@ -434,90 +584,93 @@ private:
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
/// Returns true if ASTC textures are natively supported.
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
bool ComputeIsOptimalAstcSupported() const;
/// Returns true if the device natively supports blitting depth stencil images.
bool TestDepthStencilBlits() const;
VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
VkPhysicalDeviceProperties properties; ///< Device properties.
VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
vk::Device logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 instance_version{}; ///< Vulkan onstance version.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
VkDriverIdKHR driver_id{}; ///< Driver ID.
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 max_push_descriptors{}; ///< Maximum number of push descriptors
u32 sets_per_pool{}; ///< Sets per Description Pool
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetic.
bool is_int8_supported{}; ///< Support for int8 arithmetic.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool is_depth_bounds_supported{}; ///< Support for depth bounds.
bool is_shader_float64_supported{}; ///< Support for float64.
bool is_shader_int64_supported{}; ///< Support for int64.
bool is_shader_int16_supported{}; ///< Support for int16.
bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list
///< topologies.
bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch.
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count.
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
bool khr_pipeline_executable_properties{}; ///< Support for executable properties.
bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
bool ext_depth_clip_control{}; ///< Support for VK_EXT_depth_clip_control
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool ext_extended_dynamic_state_2{}; ///< Support for VK_EXT_extended_dynamic_state2.
bool ext_extended_dynamic_state_2_extra{}; ///< Support for VK_EXT_extended_dynamic_state2.
bool ext_extended_dynamic_state_3{}; ///< Support for VK_EXT_extended_dynamic_state3.
bool ext_extended_dynamic_state_3_blend{}; ///< Support for VK_EXT_extended_dynamic_state3.
bool ext_extended_dynamic_state_3_enables{}; ///< Support for VK_EXT_extended_dynamic_state3.
bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization.
bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state.
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline
u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline
private:
VkInstance instance; ///< Vulkan instance.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
vk::Device logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 instance_version{}; ///< Vulkan instance version.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
struct Extensions {
#define EXTENSION(prefix, macro_name, var_name) bool var_name{};
#define FEATURE(prefix, struct_name, macro_name, var_name) bool var_name{};
FOR_EACH_VK_FEATURE_1_1(FEATURE);
FOR_EACH_VK_FEATURE_1_2(FEATURE);
FOR_EACH_VK_FEATURE_1_3(FEATURE);
FOR_EACH_VK_FEATURE_EXT(FEATURE);
FOR_EACH_VK_EXTENSION(EXTENSION);
FOR_EACH_VK_EXTENSION_WIN32(EXTENSION);
#undef EXTENSION
#undef FEATURE
};
struct Features {
#define FEATURE_CORE(prefix, struct_name, macro_name, var_name) \
VkPhysicalDevice##struct_name##Features var_name{};
#define FEATURE_EXT(prefix, struct_name, macro_name, var_name) \
VkPhysicalDevice##struct_name##Features##prefix var_name{};
FOR_EACH_VK_FEATURE_1_1(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_2(FEATURE_CORE);
FOR_EACH_VK_FEATURE_1_3(FEATURE_CORE);
FOR_EACH_VK_FEATURE_EXT(FEATURE_EXT);
#undef FEATURE_CORE
#undef FEATURE_EXT
VkPhysicalDeviceFeatures features{};
};
struct Properties {
VkPhysicalDeviceDriverProperties driver{};
VkPhysicalDeviceFloatControlsProperties float_controls{};
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
VkPhysicalDeviceTransformFeedbackPropertiesEXT transform_feedback{};
VkPhysicalDeviceProperties properties{};
};
Extensions extensions{};
Features features{};
Properties properties{};
VkPhysicalDeviceFeatures2 features2{};
VkPhysicalDeviceProperties2 properties2{};
// Misc features
bool is_optimal_astc_supported{}; ///< Support for all guest ASTC formats.
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_integrated{}; ///< Is GPU an iGPU.
bool is_virtual{}; ///< Is GPU a virtual GPU.
bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit
bool has_renderdoc{}; ///< Has RenderDoc attached
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
std::vector<size_t> valid_heap_memory; ///< Heaps used.
std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
std::set<std::string, std::less<>> loaded_extensions; ///< Loaded Vulkan extensions.
std::vector<size_t> valid_heap_memory; ///< Heaps used.
/// Format properties dictionary.
std::unordered_map<VkFormat, VkFormatProperties> format_properties;

View File

@@ -32,7 +32,7 @@
namespace Vulkan {
namespace {
[[nodiscard]] std::vector<const char*> RequiredExtensions(
Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
Core::Frontend::WindowSystemType window_type, bool enable_validation) {
std::vector<const char*> extensions;
extensions.reserve(6);
switch (window_type) {
@@ -65,7 +65,7 @@ namespace {
if (window_type != Core::Frontend::WindowSystemType::Headless) {
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
}
if (enable_debug_utils) {
if (enable_validation) {
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -95,9 +95,9 @@ namespace {
return true;
}
[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
std::vector<const char*> layers;
if (enable_layers) {
if (enable_validation) {
layers.push_back("VK_LAYER_KHRONOS_validation");
}
return layers;
@@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const
vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
u32 required_version, Core::Frontend::WindowSystemType window_type,
bool enable_debug_utils, bool enable_layers) {
bool enable_validation) {
if (!library.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
@@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
}
const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation);
if (!AreExtensionsSupported(dld, extensions)) {
throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
}
std::vector<const char*> layers = Layers(enable_layers);
std::vector<const char*> layers = Layers(enable_validation);
RemoveUnavailableLayers(dld, layers);
const u32 available_version = vk::AvailableVersion(dld);

View File

@@ -17,8 +17,7 @@ namespace Vulkan {
* @param dld Dispatch table to load function pointers into
* @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
* @param window_type Window system type's enabled extension
* @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
* @param enable_layers Whether to enable Vulkan validation layers or not
* @param enable_validation Whether to enable Vulkan validation layers or not
*
* @return A new Vulkan instance
* @throw vk::Exception on failure
@@ -26,6 +25,6 @@ namespace Vulkan {
[[nodiscard]] vk::Instance CreateInstance(
const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
bool enable_debug_utils = false, bool enable_layers = false);
bool enable_validation = false);
} // namespace Vulkan

View File

@@ -96,8 +96,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdDrawIndexed);
X(vkCmdDrawIndirect);
X(vkCmdDrawIndexedIndirect);
X(vkCmdDrawIndirectCountKHR);
X(vkCmdDrawIndexedIndirectCountKHR);
X(vkCmdDrawIndirectCount);
X(vkCmdDrawIndexedIndirectCount);
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
X(vkCmdEndTransformFeedbackEXT);
@@ -221,6 +221,12 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
if (!dld.vkResetQueryPool) {
Proc(dld.vkResetQueryPool, dld, "vkResetQueryPoolEXT", device);
}
// Support for draw indirect with count is optional in Vulkan 1.2
if (!dld.vkCmdDrawIndirectCount) {
Proc(dld.vkCmdDrawIndirectCount, dld, "vkCmdDrawIndirectCountKHR", device);
Proc(dld.vkCmdDrawIndexedIndirectCount, dld, "vkCmdDrawIndexedIndirectCountKHR", device);
}
#undef X
}

View File

@@ -215,8 +215,8 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{};
PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{};
PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
@@ -1065,15 +1065,15 @@ public:
void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset,
draw_count, stride);
dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset,
draw_count, stride);
}
void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
u32 stride) const noexcept {
dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer,
count_offset, draw_count, stride);
dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer,
count_offset, draw_count, stride);
}
void ClearAttachments(Span<VkClearAttachment> attachments,

View File

@@ -34,6 +34,8 @@ SPDX-License-Identifier: GPL-2.0-or-later
<string></string>
<key>CSResourcesFileMapped</key>
<true/>
<key>LSApplicationCategoryType</key>
<string>public.app-category.games</string>
<key>LSRequiresCarbon</key>
<true/>
<key>NSHumanReadableCopyright</key>

View File

@@ -96,9 +96,9 @@ void EmuThread::run() {
m_is_running.store(false);
m_is_running.notify_all();
emit DebugModeEntered();
EmulationPaused(lk);
Common::CondvarWait(m_should_run_cv, lk, stop_token, [&] { return m_should_run; });
emit DebugModeLeft();
EmulationResumed(lk);
}
}
@@ -111,6 +111,21 @@ void EmuThread::run() {
#endif
}
// Unlock while emitting signals so that the main thread can
// continue pumping events.
void EmuThread::EmulationPaused(std::unique_lock<std::mutex>& lk) {
lk.unlock();
emit DebugModeEntered();
lk.lock();
}
void EmuThread::EmulationResumed(std::unique_lock<std::mutex>& lk) {
lk.unlock();
emit DebugModeLeft();
lk.lock();
}
#ifdef HAS_OPENGL
class OpenGLSharedContext : public Core::Frontend::GraphicsContext {
public:

View File

@@ -91,6 +91,10 @@ public:
m_stop_source.request_stop();
}
private:
void EmulationPaused(std::unique_lock<std::mutex>& lk);
void EmulationResumed(std::unique_lock<std::mutex>& lk);
private:
Core::System& m_system;

View File

@@ -690,6 +690,7 @@ void Config::ReadRendererValues() {
qt_config->beginGroup(QStringLiteral("Renderer"));
ReadGlobalSetting(Settings::values.renderer_backend);
ReadGlobalSetting(Settings::values.renderer_force_max_clock);
ReadGlobalSetting(Settings::values.vulkan_device);
ReadGlobalSetting(Settings::values.fullscreen_mode);
ReadGlobalSetting(Settings::values.aspect_ratio);
@@ -1306,6 +1307,9 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
Settings::values.renderer_backend.UsingGlobal());
WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()),
static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)),
static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault()));
WriteGlobalSetting(Settings::values.vulkan_device);
WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)),

View File

@@ -364,6 +364,11 @@
<string>1X (720p/1080p)</string>
</property>
</item>
<item>
<property name="text">
<string>1.5X (1080p/1620p) [EXPERIMENTAL]</string>
</property>
</item>
<item>
<property name="text">
<string>2X (1440p/2160p)</string>
@@ -389,6 +394,16 @@
<string>6X (4320p/6480p)</string>
</property>
</item>
<item>
<property name="text">
<string>7X (5040p/7560p)</string>
</property>
</item>
<item>
<property name="text">
<string>8X (5760p/8640p)</string>
</property>
</item>
</widget>
</item>
</layout>

View File

@@ -22,9 +22,11 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
void ConfigureGraphicsAdvanced::SetConfiguration() {
const bool runtime_lock = !system.IsPoweredOn();
ui->use_vsync->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setEnabled(runtime_lock);
ui->use_asynchronous_shaders->setEnabled(runtime_lock);
ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
@@ -43,6 +45,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
&Settings::values.max_anisotropy);
ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
!Settings::values.gpu_accuracy.UsingGlobal());
ConfigurationShared::SetHighlight(ui->renderer_force_max_clock,
!Settings::values.renderer_force_max_clock.UsingGlobal());
ConfigurationShared::SetHighlight(ui->af_label,
!Settings::values.max_anisotropy.UsingGlobal());
}
@@ -50,6 +54,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
ui->renderer_force_max_clock,
renderer_force_max_clock);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
ui->anisotropic_filtering_combobox);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
@@ -81,6 +88,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
// Disable if not global (only happens during game)
if (Settings::IsConfiguringGlobal()) {
ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
ui->renderer_force_max_clock->setEnabled(
Settings::values.renderer_force_max_clock.UsingGlobal());
ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
ui->use_asynchronous_shaders->setEnabled(
Settings::values.use_asynchronous_shaders.UsingGlobal());
@@ -95,6 +104,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
return;
}
ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
Settings::values.renderer_force_max_clock,
renderer_force_max_clock);
ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
Settings::values.use_asynchronous_shaders,

View File

@@ -36,6 +36,7 @@ private:
std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
ConfigurationShared::CheckState renderer_force_max_clock;
ConfigurationShared::CheckState use_vsync;
ConfigurationShared::CheckState use_asynchronous_shaders;
ConfigurationShared::CheckState use_fast_gpu_time;

View File

@@ -69,6 +69,16 @@
</layout>
</widget>
</item>
<item>
<widget class="QCheckBox" name="renderer_force_max_clock">
<property name="toolTip">
<string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
</property>
<property name="text">
<string>Force maximum clocks (Vulkan only)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="use_vsync">
<property name="toolTip">

View File

@@ -1466,6 +1466,12 @@ void ConfigureInputPlayer::mousePressEvent(QMouseEvent* event) {
input_subsystem->GetMouse()->PressButton(0, 0, 0, 0, button);
}
void ConfigureInputPlayer::wheelEvent(QWheelEvent* event) {
const int x = event->angleDelta().x();
const int y = event->angleDelta().y();
input_subsystem->GetMouse()->MouseWheelChange(x, y);
}
void ConfigureInputPlayer::keyPressEvent(QKeyEvent* event) {
if (!input_setter || !event) {
return;

View File

@@ -116,6 +116,9 @@ private:
/// Handle mouse button press events.
void mousePressEvent(QMouseEvent* event) override;
/// Handle mouse wheel move events.
void wheelEvent(QWheelEvent* event) override;
/// Handle key press events.
void keyPressEvent(QKeyEvent* event) override;

View File

@@ -1839,9 +1839,11 @@ void GMainWindow::OnEmulationStopTimeExpired() {
void GMainWindow::OnEmulationStopped() {
shutdown_timer.stop();
emu_thread->disconnect();
emu_thread->wait();
emu_thread = nullptr;
if (emu_thread) {
emu_thread->disconnect();
emu_thread->wait();
emu_thread.reset();
}
if (shutdown_dialog) {
shutdown_dialog->deleteLater();
@@ -3029,6 +3031,8 @@ void GMainWindow::OnStopGame() {
if (OnShutdownBegin()) {
OnShutdownBeginDialog();
} else {
OnEmulationStopped();
}
}
@@ -3726,15 +3730,36 @@ void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_vie
}
}
std::string GMainWindow::CreateTASFramesString(
std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const {
std::string string = "";
size_t maxPlayerIndex = 0;
for (size_t i = 0; i < frames.size(); i++) {
if (frames[i] != 0) {
if (maxPlayerIndex != 0)
string += ", ";
while (maxPlayerIndex++ != i)
string += "0, ";
string += std::to_string(frames[i]);
}
}
return string;
}
QString GMainWindow::GetTasStateDescription() const {
auto [tas_status, current_tas_frame, total_tas_frames] = input_subsystem->GetTas()->GetStatus();
std::string tas_frames_string = CreateTASFramesString(total_tas_frames);
switch (tas_status) {
case InputCommon::TasInput::TasState::Running:
return tr("TAS state: Running %1/%2").arg(current_tas_frame).arg(total_tas_frames);
return tr("TAS state: Running %1/%2")
.arg(current_tas_frame)
.arg(QString::fromStdString(tas_frames_string));
case InputCommon::TasInput::TasState::Recording:
return tr("TAS state: Recording %1").arg(total_tas_frames);
return tr("TAS state: Recording %1").arg(total_tas_frames[0]);
case InputCommon::TasInput::TasState::Stopped:
return tr("TAS state: Idle %1/%2").arg(current_tas_frame).arg(total_tas_frames);
return tr("TAS state: Idle %1/%2")
.arg(current_tas_frame)
.arg(QString::fromStdString(tas_frames_string));
default:
return tr("TAS State: Invalid");
}

View File

@@ -12,6 +12,7 @@
#include "common/announce_multiplayer_room.h"
#include "common/common_types.h"
#include "input_common/drivers/tas_input.h"
#include "yuzu/compatibility_list.h"
#include "yuzu/hotkeys.h"
@@ -266,6 +267,9 @@ private:
void changeEvent(QEvent* event) override;
void closeEvent(QCloseEvent* event) override;
std::string CreateTASFramesString(
std::array<size_t, InputCommon::TasInput::PLAYER_NUMBER> frames) const;
#ifdef __unix__
void SetupSigInterrupts();
static void HandleSigInterrupt(int);

View File

@@ -296,6 +296,7 @@ void Config::ReadValues() {
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);
ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
ReadSetting("Renderer", Settings::values.renderer_debug);
ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);