Compare commits

..

76 Commits

Author SHA1 Message Date
Skyth
5f08e51727 Add credits for MME shadow RAM 2020-03-18 23:06:08 +03:00
Skyth
72c17f8de8 Move ShadowRamControl::Replay handler to MacroInterpreter::Send 2020-03-18 21:14:33 +03:00
Skyth
c8aa2fe56b maxwell_3d: Implement MME shadow RAM
Implementation was based on this Ryujinx PR: https://github.com/Ryujinx/Ryujinx/pull/987
2020-03-17 23:30:04 +03:00
bunnei
1c45c8086e Merge pull request #3498 from ReinUsesLisp/texel-fetch-glsl
gl_shader_decompiler: Add layer component to texelFetch
2020-03-17 10:53:38 -04:00
bunnei
e8ded20d24 Merge pull request #3521 from ReinUsesLisp/nsight-debug
renderer_opengl: Detect Nvidia Nsight as a debugging tool
2020-03-16 22:52:42 -04:00
ReinUsesLisp
53d673a7d3 renderer_opengl: Move some logic to an anonymous namespace 2020-03-16 04:03:34 -03:00
ReinUsesLisp
311d2fc768 renderer_opengl: Detect Nvidia Nsight as a debugging tool
Use getenv to detect Nsight.
2020-03-16 03:59:08 -03:00
Rodrigo Locatti
b16c8e0e8d Merge pull request #3515 from ReinUsesLisp/vertex-vk-assert
vk_rasterizer: Fix vertex range assert
2020-03-15 21:26:54 -03:00
Rodrigo Locatti
7cc46a6faa Merge pull request #3501 from ReinUsesLisp/rgba16-snorm
video_core: Implement RGBA16_SNORM
2020-03-15 21:24:53 -03:00
Rodrigo Locatti
ddafc99776 Merge pull request #3502 from namkazt/patch-3
shader_decode: Reimplement BFE instructions
2020-03-15 21:23:04 -03:00
Rodrigo Locatti
d64edf21bb Merge pull request #3503 from makigumo/patch-2
maxwell_to_vk: add vertex format eA2B10G10R10UnormPack32
2020-03-15 21:21:38 -03:00
Rodrigo Locatti
86b1f15d9a Merge pull request #3512 from bunnei/fix-renderdoc
renderer_opengl: Keep frames synchronized when using a GPU debugger.
2020-03-15 19:28:43 -03:00
Rodrigo Locatti
d91a880f11 Merge pull request #3516 from makigumo/patch-3
vk_shader_decompiler: fix linux build
2020-03-15 18:43:40 -03:00
makigumo
f91046bf8d vk_shader_decompiler: fix linux build 2020-03-15 18:00:14 +01:00
ReinUsesLisp
a7131af7d6 vk_rasterizer: Fix vertex range assert
End can be equal to start in CalculateVertexArraysSize. This is quite
common when the vertex size is zero.
2020-03-15 04:04:17 -03:00
bunnei
c5afe93dcc renderer_opengl: Keep presentation frames in lock-step when GPU debugging.
- Fixes renderdoc with OpenGL renderer.
2020-03-14 17:45:01 -04:00
bunnei
4373fa8042 gl_device: Add option to check GL_EXT_debug_tool. 2020-03-14 17:39:29 -04:00
bunnei
4dfd5c84ea Merge pull request #3508 from FernandoS27/page-table
PageTable: move backing addresses to a children class as the CPU page table does not need them.
2020-03-14 16:50:27 -04:00
Fernando Sahmkow
c51dbf8038 Merge pull request #3500 from ReinUsesLisp/incompatible-types
texture_cache: Report incompatible textures as black
2020-03-14 09:49:05 -04:00
Fernando Sahmkow
41905ee467 Merge pull request #3499 from ReinUsesLisp/depth-2d-array
texture_cache/surface_params: Force depth=1 on 2D textures
2020-03-14 09:48:39 -04:00
Fernando Sahmkow
35145bd529 Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
2020-03-14 09:48:15 -04:00
Fernando Sahmkow
27cbb75e7c PageTable: move backing addresses to a children class as the CPU page table does not need them.
This PR aims to reduce the memory usage in the CPU page table by moving
GPU specific parameters into a child class. This saves 1Gb of Memory for
most games.
2020-03-14 09:43:57 -04:00
Nguyen Dac Nam
3287b1247d clang-format 2020-03-14 10:07:40 +07:00
Nguyen Dac Nam
240d45830d nit 2020-03-14 09:57:24 +07:00
ReinUsesLisp
69c7a01f88 vk/gl_shader_decompiler: Silence assertion on compute 2020-03-13 18:33:05 -03:00
ReinUsesLisp
62560f1e63 vk_shader_decompiler: Fix default varying regression 2020-03-13 18:33:05 -03:00
ReinUsesLisp
afebdda203 maxwell_3d: Add padding words to XFB entries
Use INSERT_UNION_PADDING_WORDS instead of alignas to ensure a size
requirement.
2020-03-13 18:33:05 -03:00
ReinUsesLisp
4bc4851d45 gl_shader_decompiler: Fix implicit conversion errors 2020-03-13 18:33:05 -03:00
Rodrigo Locatti
47459f6a36 vk_shader_decompiler: Fix implicit type conversion
Co-Authored-By: Mat M. <mathew1800@gmail.com>
2020-03-13 18:33:05 -03:00
ReinUsesLisp
2fae1e6205 vk_rasterizer: Implement transform feedback binding zero 2020-03-13 18:33:05 -03:00
ReinUsesLisp
b67360c0f8 vk_shader_decompiler: Add XFB decorations to generic varyings 2020-03-13 18:33:05 -03:00
ReinUsesLisp
8d5bdcb17b vk_device: Enable VK_EXT_transform_feedback when available 2020-03-13 18:33:05 -03:00
ReinUsesLisp
c320702092 vk_device: Shrink formatless capability name size 2020-03-13 18:33:05 -03:00
ReinUsesLisp
ae6189d7c2 shader/transform_feedback: Expose buffer stride 2020-03-13 18:33:05 -03:00
ReinUsesLisp
7acebd7eb6 vk_shader_decompiler: Use registry for specialization 2020-03-13 18:33:05 -03:00
ReinUsesLisp
8e9f23f393 gl_rasterizer: Implement transform feedback bindings 2020-03-13 18:33:04 -03:00
ReinUsesLisp
4d711dface gl_shader_decompiler: Decorate output attributes with XFB layout
We sometimes have to slice attributes in different parts. This is needed
for example in instances where the game feedbacks 3 components but
writes 4 from the shader (something that is possible with
GL_NV_transform_feedback).
2020-03-13 18:33:04 -03:00
ReinUsesLisp
3dcaa84ba4 shader/transform_feedback: Add host API friendly TFB builder 2020-03-13 18:33:04 -03:00
Fernando Sahmkow
666d431ad8 Merge pull request #3473 from ReinUsesLisp/shader-purge
gl_shader_cache: Rework shader cache and store texture arrays
2020-03-13 16:26:24 -04:00
Rodrigo Locatti
244fe13219 Merge branch 'master' into shader-purge 2020-03-13 16:44:06 -03:00
bunnei
b30b1f741d Merge pull request #3491 from ReinUsesLisp/polygon-modes
gl_rasterizer: Implement polygon modes and fill rectangles
2020-03-13 10:08:57 -04:00
makigumo
753bc2026f fix formatting 2020-03-13 11:37:24 +01:00
makigumo
54681909be maxwell_to_vk: add vertex format eA2B10G10R10UnormPack32 2020-03-13 11:26:13 +01:00
Nguyen Dac Nam
00607fe1e0 clang-format 2020-03-13 15:38:57 +07:00
Nguyen Dac Nam
325977c0c6 Apply suggestions from code review
Co-Authored-By: Mat M. <mathew1800@gmail.com>
2020-03-13 15:35:15 +07:00
Nguyen Dac Nam
70ff82f72d shader_decode: BFE add ref of reverse parallel method. 2020-03-13 14:20:18 +07:00
Nguyen Dac Nam
96a4abe12d shader_decode: implement BREV on BFE
Implement reverse parallel follow: https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
2020-03-13 14:13:31 +07:00
Nguyen Dac Nam
93547cac68 shader_bytecode: update BFE instructions struct. 2020-03-13 12:52:16 +07:00
Nguyen Dac Nam
911c56ccef node_helper: add IBitfieldExtract case 2020-03-13 12:50:32 +07:00
Nguyen Dac Nam
465ba30d08 shader_decode: Reimplement BFE instructions 2020-03-13 12:48:01 +07:00
ReinUsesLisp
e24197bb3f gl_shader_decompiler: Initialize gl_Position on vertex shaders 2020-03-12 23:31:06 -03:00
Fernando Sahmkow
00e9ba0603 Merge pull request #3483 from namkazt/patch-1
vk_rasterizer: fix mistype on SetupGraphicsImages
2020-03-12 22:10:48 -04:00
Fernando Sahmkow
f159a12820 Merge pull request #3480 from ReinUsesLisp/vk-disabled-ubo
vk_rasterizer: Support disabled uniform buffers
2020-03-12 22:09:49 -04:00
ReinUsesLisp
3a10016e38 gl_shader_decompiler: Add missing {} on smem GLSL emission 2020-03-12 21:50:37 -03:00
ReinUsesLisp
4dcca90ef4 video_core: Implement RGBA16_SNORM
Implement RGBA16_SNORM with the current API. Nothing special here.
2020-03-12 21:42:33 -03:00
ReinUsesLisp
e22816a5bb texture_cache: Report incompatible textures as black
Some games bind incompatible texture types to certain types.
For example Astral Chain binds a 2D texture with 1 layer (non-array) to
a cubemap slot (that's how it's used in the shader). After testing this
in hardware, the expected "undefined behavior" is to report all pixels
as black.

We already have a path for reporting black textures in the texture
cache. When textures types are incompatible, this commit binds these
kind of textures. This is done on the API agnostic texture cache so no
extra code has to be inserted on OpenGL or Vulkan.

As a side effect, this fixes invalidations of ASTC textures on Astral
Chain. This happened because yuzu detected a cube texture and forced
6 faces, generating a texture larger than what the TIC reported.
2020-03-12 18:22:05 -03:00
ReinUsesLisp
daae6a323b texture_cache/surface_params: Force depth=1 on 2D textures
Sometimes games will sample a 2D array TIC with a 2D access in the
shader. This causes bad interactions with the rest of the texture cache.
To emulate what the game wants to do, force a depth=1 on 2D textures
(not 2D arrays) and let the texture cache handle the rest.
2020-03-12 18:11:42 -03:00
ReinUsesLisp
38fe070d78 gl_shader_decompiler: Add layer component to texelFetch
TexelFetch was not emitting the array component generating invalid GLSL.
2020-03-12 18:10:29 -03:00
bunnei
ca2d228c9d Merge pull request #3497 from FernandoS27/microprogfile-extend
Small corrections and features to microprofile
2020-03-12 12:14:03 -04:00
bunnei
c21dc36cda Merge pull request #3496 from vitor-k/remove-enum
framebuffer_layout.h: drop the use of enum for screen dimensions
2020-03-12 12:00:39 -04:00
ReinUsesLisp
825d629565 gl_shader_decompiler: Fix regression in render target declarations
A previous commit introduced a way to declare as few render targets as
possible. Turns out this introduced a regression in some games.
2020-03-12 05:01:20 -03:00
Vitor Kiguchi
e891ff9a0c framebuffer_layout.h: drop the use of enum for screen dimensions.
+clang format
2020-03-11 14:22:28 -03:00
ReinUsesLisp
e4bc3c3342 gl_rasterizer: Implement polygon modes and fill rectangles 2020-03-09 20:39:58 -03:00
ReinUsesLisp
eb5861e0a2 engines/maxwell_3d: Add TFB registers and store them in shader registry 2020-03-09 18:40:53 -03:00
ReinUsesLisp
b1acb4f73f shader/registry: Address feedback 2020-03-09 18:40:53 -03:00
ReinUsesLisp
b1061afed9 gl_shader_decompiler: Add identifier to decompiled code 2020-03-09 18:40:53 -03:00
ReinUsesLisp
e612242977 gl_shader_decompiler: Roll back to GLSL core 430
RenderDoc won't build shaders if we use GLSL compatibility.
2020-03-09 18:40:53 -03:00
ReinUsesLisp
978172530e const_buffer_engine_interface: Store component types
This is required for Vulkan. Sampling integer textures with float
handles is illegal.
2020-03-09 18:40:53 -03:00
ReinUsesLisp
120f688272 yuzu/loading_screen: Remove unused shader progress mode 2020-03-09 18:40:53 -03:00
ReinUsesLisp
e1932351a9 gl_shader_cache: Reduce registry consistency to debug assert
Registry consistency is something that practically can't happen and it
has a measurable runtime cost. Reduce it to a DEBUG_ASSERT.
2020-03-09 18:40:07 -03:00
ReinUsesLisp
66a8a3e887 shader/registry: Cache tessellation state 2020-03-09 18:40:07 -03:00
ReinUsesLisp
0528be5c92 shader/registry: Store graphics and compute metadata
Store information GLSL forces us to provide but it's dynamic state in
hardware (workgroup sizes, primitive topology, shared memory size).
2020-03-09 18:40:07 -03:00
ReinUsesLisp
e8efd5a901 video_core: Rename "const buffer locker" to "registry" 2020-03-09 18:40:06 -03:00
ReinUsesLisp
bd8b9bbcee gl_shader_cache: Rework shader cache and remove post-specializations
Instead of pre-specializing shaders and then post-specializing them,
drop the later and only "specialize" the shader while decoding it.
2020-03-09 18:40:06 -03:00
Nguyen Dac Nam
16cfbb068c vk_reasterizer: fix mistype on SetupGraphicsImages
This should use Maxwell3D engine. Fixed some GPU error on Kirby and maybe other games.
2020-03-08 10:06:59 +07:00
ReinUsesLisp
e4f9ce0379 vk_rasterizer: Support disabled uniform buffers 2020-03-06 18:47:51 -03:00
65 changed files with 2109 additions and 1785 deletions

View File

@@ -57,8 +57,6 @@ set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -91,8 +89,6 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
@@ -101,9 +97,13 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
"${VIDEO_CORE}/shader/node_helper.h"
"${VIDEO_CORE}/shader/registry.cpp"
"${VIDEO_CORE}/shader/registry.h"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
"${VIDEO_CORE}/shader/transform_feedback.cpp"
"${VIDEO_CORE}/shader/transform_feedback.h"
)
set(COMBINED "")
foreach (F IN LISTS HASH_FILES)

View File

@@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/shader/decode/arithmetic.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
"${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/ast.h"
"${VIDEO_CORE}/shader/compiler_settings.cpp"
"${VIDEO_CORE}/shader/compiler_settings.h"
"${VIDEO_CORE}/shader/const_buffer_locker.cpp"
"${VIDEO_CORE}/shader/const_buffer_locker.h"
"${VIDEO_CORE}/shader/control_flow.cpp"
"${VIDEO_CORE}/shader/control_flow.h"
"${VIDEO_CORE}/shader/decode.cpp"
@@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/node.h"
"${VIDEO_CORE}/shader/node_helper.cpp"
"${VIDEO_CORE}/shader/node_helper.h"
"${VIDEO_CORE}/shader/registry.cpp"
"${VIDEO_CORE}/shader/registry.h"
"${VIDEO_CORE}/shader/shader_ir.cpp"
"${VIDEO_CORE}/shader/shader_ir.h"
"${VIDEO_CORE}/shader/track.cpp"
"${VIDEO_CORE}/shader/transform_feedback.cpp"
"${VIDEO_CORE}/shader/transform_feedback.h"
# and also check that the scm_rev files haven't changed
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"

View File

@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
backing_addr.resize(num_page_table_entries);
// The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
// vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.shrink_to_fit();
attributes.shrink_to_fit();
}
BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
BackingPageTable::~BackingPageTable() = default;
void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
PageTable::Resize(address_space_width_in_bits);
const std::size_t num_page_table_entries = 1ULL
<< (address_space_width_in_bits - page_size_in_bits);
backing_addr.resize(num_page_table_entries);
backing_addr.shrink_to_fit();
}

View File

@@ -76,9 +76,20 @@ struct PageTable {
*/
std::vector<PageType> attributes;
std::vector<u64> backing_addr;
const std::size_t page_size_in_bits{};
};
/**
* A more advanced Page Table with the ability to save a backing address when using it
* depends on another MMU.
*/
struct BackingPageTable : PageTable {
explicit BackingPageTable(std::size_t page_size_in_bits);
~BackingPageTable();
void Resize(std::size_t address_space_width_in_bits);
std::vector<u64> backing_addr;
};
} // namespace Common

View File

@@ -48,8 +48,8 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
u32 width, height;
if (Settings::values.use_docked_mode) {
width = ScreenDocked::WidthDocked * res_scale;
height = ScreenDocked::HeightDocked * res_scale;
width = ScreenDocked::Width * res_scale;
height = ScreenDocked::Height * res_scale;
} else {
width = ScreenUndocked::Width * res_scale;
height = ScreenUndocked::Height * res_scale;

View File

@@ -8,15 +8,15 @@
namespace Layout {
enum ScreenUndocked : u32 {
Width = 1280,
Height = 720,
};
namespace ScreenUndocked {
constexpr u32 Width = 1280;
constexpr u32 Height = 720;
} // namespace ScreenUndocked
enum ScreenDocked : u32 {
WidthDocked = 1920,
HeightDocked = 1080,
};
namespace ScreenDocked {
constexpr u32 Width = 1920;
constexpr u32 Height = 1080;
} // namespace ScreenDocked
enum class AspectRatio {
Default,

View File

@@ -65,8 +65,6 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_decompiler.h
renderer_opengl/gl_shader_disk_cache.cpp
renderer_opengl/gl_shader_disk_cache.h
renderer_opengl/gl_shader_gen.cpp
renderer_opengl/gl_shader_gen.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_util.cpp
@@ -118,8 +116,6 @@ add_library(video_core STATIC
shader/ast.h
shader/compiler_settings.cpp
shader/compiler_settings.h
shader/const_buffer_locker.cpp
shader/const_buffer_locker.h
shader/control_flow.cpp
shader/control_flow.h
shader/decode.cpp
@@ -128,9 +124,13 @@ add_library(video_core STATIC
shader/node_helper.cpp
shader/node_helper.h
shader/node.h
shader/registry.cpp
shader/registry.h
shader/shader_ir.cpp
shader/shader_ir.h
shader/track.cpp
shader/transform_feedback.cpp
shader/transform_feedback.h
surface.cpp
surface.h
texture_cache/format_lookup_table.cpp

View File

@@ -16,11 +16,12 @@ namespace Tegra::Engines {
struct SamplerDescriptor {
union {
BitField<0, 20, Tegra::Shader::TextureType> texture_type;
BitField<20, 1, u32> is_array;
BitField<21, 1, u32> is_buffer;
BitField<22, 1, u32> is_shadow;
u32 raw{};
u32 raw = 0;
BitField<0, 2, Tegra::Shader::TextureType> texture_type;
BitField<2, 3, Tegra::Texture::ComponentType> component_type;
BitField<5, 1, u32> is_array;
BitField<6, 1, u32> is_buffer;
BitField<7, 1, u32> is_shadow;
};
bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +32,48 @@ struct SamplerDescriptor {
return !operator==(rhs);
}
static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
using Tegra::Shader::TextureType;
SamplerDescriptor result;
switch (tic_texture_type) {
// This is going to be used to determine the shading language type.
// Because of that we don't care about all component types on color textures.
result.component_type.Assign(tic.r_type.Value());
switch (tic.texture_type.Value()) {
case Tegra::Texture::TextureType::Texture1D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture1D);
return result;
case Tegra::Texture::TextureType::Texture2D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::Texture3D:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture3D);
return result;
case Tegra::Texture::TextureType::TextureCubemap:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::TextureCube);
return result;
case Tegra::Texture::TextureType::Texture1DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.texture_type.Assign(TextureType::Texture1D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DArray:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.texture_type.Assign(TextureType::Texture2D);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture1DBuffer:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
result.is_array.Assign(0);
result.texture_type.Assign(TextureType::Texture1D);
result.is_buffer.Assign(1);
result.is_shadow.Assign(0);
return result;
case Tegra::Texture::TextureType::Texture2DNoMipmap:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
case Tegra::Texture::TextureType::TextureCubeArray:
result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
result.texture_type.Assign(TextureType::TextureCube);
result.is_array.Assign(1);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
return result;
default:
result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
result.is_array.Assign(0);
result.is_buffer.Assign(0);
result.is_shadow.Assign(0);
result.texture_type.Assign(TextureType::Texture2D);
return result;
}
}

View File

@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}

View File

@@ -122,6 +122,13 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
const u32 method = method_call.method;
// Keep track of the register value in shadow_regs when requested.
if (method < Regs::NUM_REGS &&
(regs.shadow_ram_control == Regs::ShadowRamControl::Track ||
regs.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter)) {
shadow_regs.reg_array[method] = method_call.argument;
}
if (method == cb_data_state.current) {
regs.reg_array[method] = method_call.argument;
ProcessCBData(method_call.argument);
@@ -638,7 +645,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
return result;
}

View File

@@ -67,6 +67,7 @@ public:
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t NumTransformFeedbackBuffers = 4;
static constexpr std::size_t MaxShaderProgram = 6;
static constexpr std::size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
@@ -524,6 +525,24 @@ public:
FractionalEven = 2,
};
enum class PolygonMode : u32 {
Point = 0x1b00,
Line = 0x1b01,
Fill = 0x1b02,
};
// Thanks to fincs for reverse engineering
// MME shadow RAM and Ryujinx for providing
// the base for the implementation.
// https://github.com/fincs
// https://github.com/Ryujinx/Ryujinx/pull/987
enum class ShadowRamControl : u32 {
Track = 0,
TrackWithFilter = 1,
Passthrough = 2,
Replay = 3,
};
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -621,6 +640,29 @@ public:
float depth_range_far;
};
struct TransformFeedbackBinding {
u32 buffer_enable;
u32 address_high;
u32 address_low;
s32 buffer_size;
s32 buffer_offset;
INSERT_UNION_PADDING_WORDS(3);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
address_low);
}
};
static_assert(sizeof(TransformFeedbackBinding) == 32);
struct TransformFeedbackLayout {
u32 stream;
u32 varying_count;
u32 stride;
INSERT_UNION_PADDING_WORDS(1);
};
static_assert(sizeof(TransformFeedbackLayout) == 16);
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -629,6 +671,10 @@ public:
return shader_config[index].enable != 0;
}
bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
return IsShaderConfigEnabled(static_cast<std::size_t>(type));
}
union {
struct {
INSERT_UNION_PADDING_WORDS(0x45);
@@ -640,7 +686,9 @@ public:
u32 bind;
} macros;
INSERT_UNION_PADDING_WORDS(0x17);
ShadowRamControl shadow_ram_control;
INSERT_UNION_PADDING_WORDS(0x16);
Upload::Registers upload;
struct {
@@ -677,7 +725,13 @@ public:
u32 rasterize_enable;
INSERT_UNION_PADDING_WORDS(0xF1);
std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
INSERT_UNION_PADDING_WORDS(0xC0);
std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
INSERT_UNION_PADDING_WORDS(0x1);
u32 tfb_enabled;
@@ -705,7 +759,12 @@ public:
s32 clear_stencil;
INSERT_UNION_PADDING_WORDS(0x7);
INSERT_UNION_PADDING_WORDS(0x2);
PolygonMode polygon_mode_front;
PolygonMode polygon_mode_back;
INSERT_UNION_PADDING_WORDS(0x3);
u32 polygon_offset_point_enable;
u32 polygon_offset_line_enable;
@@ -764,7 +823,11 @@ public:
BitField<12, 4, u32> viewport;
} clear_flags;
INSERT_UNION_PADDING_WORDS(0x19);
INSERT_UNION_PADDING_WORDS(0x10);
u32 fill_rectangle;
INSERT_UNION_PADDING_WORDS(0x8);
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
@@ -1187,7 +1250,11 @@ public:
u32 tex_cb_index;
INSERT_UNION_PADDING_WORDS(0x395);
INSERT_UNION_PADDING_WORDS(0x7D);
std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
INSERT_UNION_PADDING_WORDS(0x298);
struct {
/// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1210,7 +1277,10 @@ public:
};
std::array<u32, NUM_REGS> reg_array;
};
} regs{};
};
Regs regs{};
Regs shadow_regs{};
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -1405,6 +1475,7 @@ private:
"Field " #field_name " has invalid position")
ASSERT_REG_POSITION(macros, 0x45);
ASSERT_REG_POSITION(shadow_ram_control, 0x49);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
@@ -1413,6 +1484,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
ASSERT_REG_POSITION(tess_level_inner, 0xCD);
ASSERT_REG_POSITION(rasterize_enable, 0xDF);
ASSERT_REG_POSITION(tfb_bindings, 0xE0);
ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
ASSERT_REG_POSITION(rt, 0x200);
ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1422,6 +1495,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
@@ -1435,6 +1510,7 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
@@ -1508,6 +1584,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
ASSERT_REG_POSITION(ssbo_info, 0xD18);
ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);

View File

@@ -911,14 +911,9 @@ union Instruction {
} fadd32i;
union {
BitField<20, 8, u64> shift_position;
BitField<28, 8, u64> shift_length;
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_a;
u64 GetLeftShiftValue() const {
return 32 - (shift_position + shift_length);
}
BitField<40, 1, u64> brev;
BitField<47, 1, u64> rd_cc;
BitField<48, 1, u64> is_signed;
} bfe;
union {

View File

@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
RGBA32_FLOAT = 0xC0,
RGBA32_UINT = 0xC2,
RGBA16_UNORM = 0xC6,
RGBA16_SNORM = 0xC7,
RGBA16_UINT = 0xC9,
RGBA16_FLOAT = 0xCA,
RG32_FLOAT = 0xCB,

View File

@@ -4,13 +4,15 @@
#include <algorithm>
#include <limits>
#include <vector>
#include "common/common_types.h"
#include "video_core/guest_driver.h"
namespace VideoCore {
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
if (texture_handler_size_deduced) {
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
if (texture_handler_size) {
return;
}
const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
if (min_val > 2) {
return;
}
texture_handler_size_deduced = true;
texture_handler_size = min_texture_handler_size * min_val;
}

View File

@@ -4,6 +4,7 @@
#pragma once
#include <optional>
#include <vector>
#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
*/
class GuestDriverProfile {
public:
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
explicit GuestDriverProfile() = default;
explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
: texture_handler_size{texture_handler_size} {}
void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
u32 GetTextureHandlerSize() const {
return texture_handler_size;
return texture_handler_size.value_or(default_texture_handler_size);
}
bool TextureHandlerSizeKnown() const {
return texture_handler_size_deduced;
bool IsTextureHandlerSizeKnown() const {
return texture_handler_size.has_value();
}
private:
// Minimum size of texture handler any driver can use.
static constexpr u32 min_texture_handler_size = 4;
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
// use 4 bytes instead. Thus, certain drivers may squish the size.
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
// Thus, certain drivers may squish the size.
static constexpr u32 default_texture_handler_size = 8;
u32 texture_handler_size = default_texture_handler_size;
bool texture_handler_size_deduced = false;
std::optional<u32> texture_handler_size = default_texture_handler_size;
};
} // namespace VideoCore

View File

@@ -328,6 +328,12 @@ void MacroInterpreter::SetMethodAddress(u32 address) {
}
void MacroInterpreter::Send(u32 value) {
// Use the tracked value in shadow_regs when requested.
if (method_address.address < Engines::Maxwell3D::Regs::NUM_REGS &&
maxwell3d.regs.shadow_ram_control == Engines::Maxwell3D::Regs::ShadowRamControl::Replay) {
value = maxwell3d.shadow_regs.reg_array[method_address.address];
}
maxwell3d.CallMethodFromMME({method_address.address, value});
// Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() +

View File

@@ -174,7 +174,7 @@ private:
/// End of address space, based on address space in bits.
static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
Common::PageTable page_table{page_bits};
Common::BackingPageTable page_table{page_bits};
VMAMap vma_map;
VideoCore::RasterizerInterface& rasterizer;

View File

@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::R8UI>,
MortonCopy<true, PixelFormat::RGBA16F>,
MortonCopy<true, PixelFormat::RGBA16U>,
MortonCopy<true, PixelFormat::RGBA16S>,
MortonCopy<true, PixelFormat::RGBA16UI>,
MortonCopy<true, PixelFormat::R11FG11FB10F>,
MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::R8U>,
MortonCopy<false, PixelFormat::R8UI>,
MortonCopy<false, PixelFormat::RGBA16F>,
MortonCopy<false, PixelFormat::RGBA16S>,
MortonCopy<false, PixelFormat::RGBA16U>,
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,

View File

@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
enum class LoadCallbackStage {
Prepare,
Decompile,
Build,
Complete,
};

View File

@@ -28,7 +28,6 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) {
const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
@@ -272,9 +271,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader);
const ProgramVariant variant(primitive_mode);
const auto program_handle = shader->GetHandle(variant);
const GLuint program_handle = shader->GetHandle();
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
@@ -295,7 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
clip_distances |= shader->GetShaderEntries().clip_distances;
clip_distances |= shader->GetEntries().clip_distances;
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
@@ -487,6 +484,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -498,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncTransformFeedback();
SyncPointState();
SyncPolygonOffset();
SyncAlphaTest();
@@ -571,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
glTextureBarrier();
}
++num_queued_commands;
BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
@@ -610,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
EndTransformFeedback();
++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -622,12 +623,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
launch_desc.block_dim_z, launch_desc.shared_alloc,
launch_desc.local_pos_alloc);
program_manager.BindComputeShader(kernel->GetHandle(variant));
program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -645,6 +641,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -749,7 +746,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
const auto& shader_stage = stages[stage_index];
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
for (const auto& entry : shader->GetShaderEntries().const_buffers) {
for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry);
}
@@ -760,7 +757,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
for (const auto& entry : kernel->GetEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
@@ -772,7 +769,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
}
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) {
const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -796,7 +793,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
for (const auto& entry : shader->GetEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -810,7 +807,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
for (const auto& entry : kernel->GetEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -818,7 +815,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
}
}
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
@@ -830,7 +827,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetShaderEntries().samplers) {
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
@@ -843,7 +840,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().samplers) {
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
@@ -852,7 +849,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry) {
const SamplerEntry& entry) {
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
@@ -875,7 +872,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetShaderEntries().images) {
for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
@@ -885,14 +882,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetShaderEntries().images) {
for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
const GLShader::ImageEntry& entry) {
const ImageEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
if (!view) {
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
@@ -1096,6 +1093,45 @@ void RasterizerOpenGL::SyncRasterizeEnable() {
oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
if (gpu.regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
return;
}
flags[Dirty::PolygonModeFront] = true;
flags[Dirty::PolygonModeBack] = true;
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
return;
}
if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
@@ -1257,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() {
}
}
void RasterizerOpenGL::SyncTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
}
void RasterizerOpenGL::SyncPointState() {
auto& gpu = system.GPU().Maxwell3D();
auto& flags = gpu.dirty.flags;
@@ -1337,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
if (enabled_transform_feedback_buffers[index]) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
0);
}
enabled_transform_feedback_buffers[index] = false;
continue;
}
enabled_transform_feedback_buffers[index] = true;
auto& tfb_buffer = transform_feedback_buffers[index];
tfb_buffer.Create();
const GLuint handle = tfb_buffer.handle;
const std::size_t size = binding.buffer_size;
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
static_cast<GLsizeiptr>(size));
}
glBeginTransformFeedback(GL_POINTS);
}
void RasterizerOpenGL::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
glEndTransformFeedback();
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
const auto& binding = regs.tfb_bindings[index];
if (!binding.buffer_enable) {
continue;
}
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
}
}
} // namespace OpenGL

View File

@@ -98,7 +98,7 @@ private:
/// Configures a constant buffer.
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry);
const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -107,7 +107,7 @@ private:
void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer.
void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
@@ -118,7 +118,7 @@ private:
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry);
const SamplerEntry& entry);
/// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -127,8 +127,7 @@ private:
void SetupComputeImages(const Shader& shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
const GLShader::ImageEntry& entry);
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport();
@@ -169,15 +168,15 @@ private:
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the transform feedback state to match the guest state
void SyncTransformFeedback();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state
void SyncRasterizeEnable();
/// Syncs polygon modes to match the guest state
void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
@@ -190,6 +189,12 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
@@ -227,6 +232,11 @@ private:
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers;
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;

View File

@@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <atomic>
#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -24,13 +28,14 @@
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
namespace {
@@ -56,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
}
/// Calculates the size of a program stream
std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
std::size_t CalculateProgramSize(const ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
@@ -109,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
}
}
/// Describes primitive behavior on geometry shaders
constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
switch (primitive_mode) {
case GL_POINTS:
return {"points", 1};
case GL_LINES:
case GL_LINE_STRIP:
return {"lines", 2};
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return {"lines_adjacency", 4};
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return {"triangles", 3};
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return {"triangles_adjacency", 6};
default:
return {"points", 1};
}
}
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
const ProgramCode& code_b = {}) {
u64 unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
@@ -143,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
return unique_identifier;
}
/// Creates an unspecialized program from code streams
std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
const std::optional<ShaderIR>& ir_b) {
switch (shader_type) {
case ShaderType::Vertex:
return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
case ShaderType::Geometry:
return GLShader::GenerateGeometryShader(device, ir);
case ShaderType::Fragment:
return GLShader::GenerateFragmentShader(device, ir);
case ShaderType::Compute:
return GLShader::GenerateComputeShader(device, ir);
default:
UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
return {};
}
}
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
@@ -196,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
ShaderType shader_type) {
if (shader_type == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
entry.graphics_info, entry.compute_info};
const auto registry = std::make_shared<Registry>(entry.type, info);
for (const auto& [address, value] : entry.keys) {
const auto [buffer, offset] = address;
registry->InsertKey(buffer, offset, value);
}
}
std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
return std::make_unique<ConstBufferLocker>(shader_type,
GetConstBufferEngineInterface(system, shader_type));
}
void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
locker.SetBoundBuffer(usage.bound_buffer);
for (const auto& key : usage.keys) {
const auto [buffer, offset] = key.first;
locker.InsertKey(buffer, offset, key.second);
for (const auto& [offset, sampler] : entry.bound_samplers) {
registry->InsertBoundSampler(offset, sampler);
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
locker.InsertBoundSampler(offset, sampler);
}
for (const auto& [key, sampler] : usage.bindless_samplers) {
for (const auto& [key, sampler] : entry.bindless_samplers) {
const auto [buffer, offset] = key;
locker.InsertBindlessSampler(buffer, offset, sampler);
registry->InsertBindlessSampler(buffer, offset, sampler);
}
return registry;
}
CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
const ProgramCode& code, const ProgramCode& code_b,
ConstBufferLocker& locker, const ProgramVariant& variant,
bool hint_retrievable = false) {
LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
const bool is_compute = shader_type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
std::optional<ShaderIR> ir_b;
if (!code_b.empty()) {
ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
}
std::string source = fmt::format(R"(// {}
#version 430 core
#extension GL_ARB_separate_shader_objects : enable
)",
GetShaderId(unique_identifier, shader_type));
if (device.HasShaderBallot()) {
source += "#extension GL_ARB_shader_ballot : require\n";
}
if (device.HasVertexViewportLayer()) {
source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
}
if (device.HasImageLoadFormatted()) {
source += "#extension GL_EXT_shader_image_load_formatted : require\n";
}
if (device.HasWarpIntrinsics()) {
source += "#extension GL_NV_gpu_shader5 : require\n"
"#extension GL_NV_shader_thread_group : require\n"
"#extension GL_NV_shader_thread_shuffle : require\n";
}
// This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
// on places where we don't want to.
// Thanks to Ryujinx for finding this workaround.
source += "#pragma optionNV(fastmath off)\n";
if (shader_type == ShaderType::Geometry) {
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
source += fmt::format("layout ({}) in;\n", glsl_topology);
}
if (shader_type == ShaderType::Compute) {
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
source +=
fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
variant.block_x, variant.block_y, variant.block_z);
if (variant.shared_memory_size > 0) {
// shared_memory_size is described in number of words
source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
}
}
source += '\n';
source += GenerateGLSL(device, shader_type, ir, ir_b);
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
u64 unique_identifier, const ShaderIR& ir,
const Registry& registry, bool hint_retrievable = false) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
shader.Create(source.c_str(), GetGLShaderType(shader_type));
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
@@ -299,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
}
std::unordered_set<GLenum> GetSupportedFormats() {
GLint num_formats{};
GLint num_formats;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
@@ -314,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
: RasterizerCacheObject{params.host_ptr}, system{params.system},
disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, shader_type{shader_type},
entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
if (!params.precompiled_variants) {
return;
}
for (const auto& pair : *params.precompiled_variants) {
auto locker = MakeLocker(system, shader_type);
const auto& usage = pair->first;
FillLocker(*locker, usage);
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
std::unique_ptr<LockerVariant>* locker_variant = nullptr;
const auto it =
std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
return variant->locker->HasEqualKeys(*locker);
});
if (it == locker_variants.end()) {
locker_variant = &locker_variants.emplace_back();
*locker_variant = std::make_unique<LockerVariant>();
locker_variant->get()->locker = std::move(locker);
} else {
locker_variant = &*it;
}
locker_variant->get()->programs.emplace(usage.variant, pair->second);
}
CachedShader::~CachedShader() = default;
GLuint CachedShader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
return program->handle;
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type, ProgramCode code,
ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
params.disk_cache.SaveRaw(
ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
const std::size_t size_in_bytes = code.size() * sizeof(u64);
ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
return std::shared_ptr<CachedShader>(new CachedShader(
params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = shader_type;
entry.code = std::move(code);
entry.code_b = std::move(code_b);
entry.unique_identifier = params.unique_identifier;
entry.bound_buffer = registry->GetBoundBuffer();
entry.graphics_info = registry->GetGraphicsInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
params.disk_cache.SaveRaw(
ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
const std::size_t size_in_bytes = code.size() * sizeof(u64);
ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
params.system.GPU().KeplerCompute());
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
return std::shared_ptr<CachedShader>(new CachedShader(
params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
auto& engine = params.system.GPU().KeplerCompute();
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
ShaderDiskCacheEntry entry;
entry.type = ShaderType::Compute;
entry.code = std::move(code);
entry.unique_identifier = uid;
entry.bound_buffer = registry->GetBoundBuffer();
entry.compute_info = registry->GetComputeInfo();
entry.keys = registry->GetKeys();
entry.bound_samplers = registry->GetBoundSamplers();
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
size_in_bytes, std::move(registry),
MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized) {
return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
unspecialized.entries, unspecialized.code,
unspecialized.code_b));
}
GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
EnsureValidLockerVariant();
const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
auto& program = entry->second;
if (!is_cache_miss) {
return program->handle;
}
program = BuildShader(device, unique_identifier, shader_type, code, code_b,
*curr_locker_variant->locker, variant);
disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
return program->handle;
}
bool CachedShader::EnsureValidLockerVariant() {
const auto previous_variant = curr_locker_variant;
if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
curr_locker_variant = nullptr;
}
if (!curr_locker_variant) {
for (auto& variant : locker_variants) {
if (variant->locker->IsConsistent()) {
curr_locker_variant = variant.get();
}
}
}
if (!curr_locker_variant) {
auto& new_variant = locker_variants.emplace_back();
new_variant = std::make_unique<LockerVariant>();
new_variant->locker = MakeLocker(system, shader_type);
curr_locker_variant = new_variant.get();
}
return previous_variant == curr_locker_variant;
}
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
const ConstBufferLocker& locker) const {
return ShaderDiskCacheUsage{unique_identifier, variant,
locker.GetBoundBuffer(), locker.GetKeys(),
locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -432,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
const auto transferable = disk_cache.LoadTransferable();
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
const auto [raws, shader_usages] = *transferable;
if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
return;
}
const auto dumps = disk_cache.LoadPrecompiled();
const std::vector gl_cache = disk_cache.LoadPrecompiled();
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -450,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// Inform the frontend about shader build initialization
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
}
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
std::atomic_bool compilation_failed = false;
std::atomic_bool gl_cache_failed = false;
const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
const ShaderDumpsMap& dumps) {
const auto find_precompiled = [&gl_cache](u64 id) {
return std::find_if(gl_cache.begin(), gl_cache.end(),
[id](const auto& entry) { return entry.unique_identifier == id; });
};
const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
if (stop_loading) {
return;
}
const auto& usage{shader_usages[i]};
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
const auto dump{dumps.find(usage)};
const auto& entry = (*transferable)[i];
const u64 uid = entry.unique_identifier;
const auto it = find_precompiled(uid);
const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
CachedProgram shader;
if (dump != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (!shader) {
compilation_failed = true;
return;
const bool is_compute = entry.type == ShaderType::Compute;
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
std::shared_ptr<OGLProgram> program;
if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
if (!program) {
gl_cache_failed = true;
}
}
if (!shader) {
auto locker{MakeLocker(system, unspecialized.type)};
FillLocker(*locker, usage);
shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
unspecialized.code, unspecialized.code_b, *locker,
usage.variant, true);
if (!program) {
// Otherwise compile it from GLSL
program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
PrecompiledShader shader;
shader.program = std::move(program);
shader.registry = std::move(registry);
shader.entries = MakeEntries(ir);
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
transferable->size());
}
precompiled_programs.emplace(usage, std::move(shader));
// TODO(Rodrigo): Is there a better way to do this?
precompiled_variants[usage.unique_identifier].push_back(
precompiled_programs.find(usage));
runtime_cache.emplace(entry.unique_identifier, std::move(shader));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
threads[i] = std::thread(worker, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
if (gl_cache_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
@@ -533,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{shader_usages[i]};
if (dumps.find(usage) == dumps.end()) {
const auto& program{precompiled_programs.at(usage)};
disk_cache.SaveDump(usage, program->handle);
for (std::size_t i = 0; i < transferable->size(); ++i) {
const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id);
if (it == gl_cache.end()) {
const GLuint program = runtime_cache.at(id).program->handle;
disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
}
@@ -547,80 +416,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
const auto it = precompiled_variants.find(unique_identifier);
return it == precompiled_variants.end() ? nullptr : &it->second;
}
CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
CachedProgram shader = std::make_shared<OGLProgram>();
shader->handle = glCreateProgram();
glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
static_cast<GLsizei>(dump.binary.size()));
auto program = std::make_shared<OGLProgram>();
program->handle = glCreateProgram();
glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glProgramBinary(program->handle, precompiled_entry.binary_format,
precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status{};
glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
GLint link_status;
glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
}
return shader;
}
bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws) {
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
}
for (std::size_t i = 0; i < raws.size(); ++i) {
if (stop_loading) {
return false;
}
const auto& raw{raws[i]};
const u64 unique_identifier{raw.GetUniqueIdentifier()};
const u64 calculated_hash{
GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
if (unique_identifier != calculated_hash) {
LOG_ERROR(Render_OpenGL,
"Invalid hash in entry={:016x} (obtained hash={:016x}) - "
"removing shader cache",
raw.GetUniqueIdentifier(), calculated_hash);
disk_cache.InvalidateTransferable();
return false;
}
const u32 main_offset =
raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
ConstBufferLocker locker(raw.GetType());
const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (raw.HasProgramA()) {
// ir_b.emplace(raw.GetProgramCodeB(), main_offset);
// }
UnspecializedShader unspecialized;
unspecialized.entries = GLShader::GetEntries(ir);
unspecialized.type = raw.GetType();
unspecialized.code = raw.GetCode();
unspecialized.code_b = raw.GetCodeB();
unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
if (callback) {
callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
}
}
return true;
return program;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -648,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
std::move(code_b));
} else {
shader = CachedShader::CreateFromCache(params, found->second);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(shader);
@@ -673,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
return kernel;
}
// No kernel found - create a new one
// No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, precompiled_variants, device,
const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
const auto found = unspecialized_shaders.find(unique_identifier);
if (found == unspecialized_shaders.end()) {
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
kernel = CachedShader::CreateFromCache(params, found->second);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(kernel);

View File

@@ -22,7 +22,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
struct UnspecializedShader {
GLShader::ShaderEntries entries;
Tegra::Engines::ShaderType type;
ProgramCode code;
ProgramCode code_b;
struct PrecompiledShader {
std::shared_ptr<OGLProgram> program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
struct ShaderParameters {
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
class CachedShader final : public RasterizerCacheObject {
public:
~CachedShader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
return size_in_bytes;
}
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
static Shader CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type,
ProgramCode program_code, ProgramCode program_code_b);
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
static Shader CreateFromCache(const ShaderParameters& params,
const UnspecializedShader& unspecialized);
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override {
return code.size() * sizeof(u64);
}
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;
}
/// Gets the GL program handle for the shader
GLuint GetHandle(const ProgramVariant& variant);
const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes);
private:
struct LockerVariant {
std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
std::unordered_map<ProgramVariant, CachedProgram> programs;
};
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
GLShader::ShaderEntries entries, ProgramCode program_code,
ProgramCode program_code_b);
bool EnsureValidLockerVariant();
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
const VideoCommon::Shader::ConstBufferLocker& locker) const;
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr{};
u64 unique_identifier{};
Tegra::Engines::ShaderType shader_type{};
GLShader::ShaderEntries entries;
ProgramCode code;
ProgramCode code_b;
LockerVariant* curr_locker_variant = nullptr;
std::vector<std::unique_ptr<LockerVariant>> locker_variants;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback,
const std::vector<ShaderDiskCacheRaw>& raws);
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::unordered_set<GLenum>& supported_formats);
const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
PrecompiledPrograms precompiled_programs;
std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};

View File

@@ -23,8 +23,9 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader/transform_feedback.h"
namespace OpenGL::GLShader {
namespace OpenGL {
namespace {
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
bvec2 is_nan1 = isnan(pair1);
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}}
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)";
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
/// Describes primitive behavior on geometry shaders
std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
switch (topology) {
case Maxwell::PrimitiveTopology::Points:
return {"points", 1};
case Maxwell::PrimitiveTopology::Lines:
case Maxwell::PrimitiveTopology::LineStrip:
return {"lines", 2};
case Maxwell::PrimitiveTopology::LinesAdjacency:
case Maxwell::PrimitiveTopology::LineStripAdjacency:
return {"lines_adjacency", 4};
case Maxwell::PrimitiveTopology::Triangles:
case Maxwell::PrimitiveTopology::TriangleStrip:
case Maxwell::PrimitiveTopology::TriangleFan:
return {"triangles", 3};
case Maxwell::PrimitiveTopology::TrianglesAdjacency:
case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
return {"triangles_adjacency", 6};
default:
UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
return {"points", 1};
}
}
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
constexpr const char* GetColorSwizzle(std::size_t element) {
constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
return swizzle.at(element);
}
/// Translate topology
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -341,11 +397,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
return stage == ShaderType::Vertex;
}
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
bool is_scalar = false;
};
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
std::string suffix)
: device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier, std::string_view suffix)
: device{device}, ir{ir}, registry{registry}, stage{stage},
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
}
void Decompile() {
DeclareHeader();
DeclareVertex();
DeclareGeometry();
DeclareFragment();
DeclareCompute();
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareImages();
DeclareSamplers();
DeclareGlobalMemory();
DeclareConstantBuffers();
DeclareLocalMemory();
DeclareRegisters();
DeclarePredicates();
DeclareInternalFlags();
DeclareCustomVariables();
DeclarePhysicalAttributeReader();
code.AddLine("void main() {{");
++code.scope;
if (stage == ShaderType::Vertex) {
code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
}
if (ir.IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
}
--code.scope;
code.AddLine("}}");
}
std::string GetResult() {
return code.GetResult();
}
private:
friend class ASTDecompiler;
friend class ExprDecompiler;
void DecompileBranchMode() {
// VM's program counter
@@ -387,43 +498,36 @@ public:
void DecompileAST();
void Decompile() {
DeclareVertex();
DeclareGeometry();
DeclareRegisters();
DeclareCustomVariables();
DeclarePredicates();
DeclareLocalMemory();
DeclareInternalFlags();
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
++code.scope;
if (ir.IsDecompiled()) {
DecompileAST();
} else {
DecompileBranchMode();
void DeclareHeader() {
if (!identifier.empty()) {
code.AddLine("// {}", identifier);
}
code.AddLine("#version 440 core");
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
if (device.HasShaderBallot()) {
code.AddLine("#extension GL_ARB_shader_ballot : require");
}
if (device.HasVertexViewportLayer()) {
code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
}
if (device.HasImageLoadFormatted()) {
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
if (device.HasWarpIntrinsics()) {
code.AddLine("#extension GL_NV_gpu_shader5 : require");
code.AddLine("#extension GL_NV_shader_thread_group : require");
code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
}
// This pragma stops Nvidia's driver from over optimizing math (probably using fp16
// operations) on places where we don't want to.
// Thanks to Ryujinx for finding this workaround.
code.AddLine("#pragma optionNV(fastmath off)");
--code.scope;
code.AddLine("}}");
code.AddNewLine();
code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
}
std::string GetResult() {
return code.GetResult();
}
private:
friend class ASTDecompiler;
friend class ExprDecompiler;
void DeclareVertex() {
if (!IsVertexShader(stage))
return;
@@ -436,9 +540,15 @@ private:
return;
}
const auto& info = registry.GetGraphicsInfo();
const auto input_topology = info.primitive_topology;
const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
max_input_vertices = max_vertices;
code.AddLine("layout ({}) in;", glsl_topology);
const auto topology = GetTopologyName(header.common3.output_topology);
const auto max_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
const auto max_output_vertices = header.common4.max_output_vertices.Value();
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@@ -450,11 +560,40 @@ private:
DeclareVertexRedeclarations();
}
void DeclareFragment() {
if (stage != ShaderType::Fragment) {
return;
}
for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
}
}
void DeclareCompute() {
if (stage != ShaderType::Compute) {
return;
}
const auto& info = registry.GetComputeInfo();
if (const u32 size = info.shared_memory_size_in_words; size > 0) {
code.AddLine("shared uint smem[{}];", size);
code.AddNewLine();
}
code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
code.AddNewLine();
}
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
code.AddLine("vec4 gl_Position;");
auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
if (!pos_xfb.empty()) {
pos_xfb = fmt::format("layout ({}) ", pos_xfb);
}
const char* pos_type =
FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -525,18 +664,16 @@ private:
}
void DeclareLocalMemory() {
u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
code.AddLine("#endif");
return;
local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
} else {
local_memory_size = header.GetLocalMemorySize();
}
const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -589,7 +726,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -626,9 +763,59 @@ private:
}
}
std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
return it->second.components;
}
std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
const VaryingTFB& tfb = it->second;
return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
tfb.offset, tfb.stride);
}
void DeclareOutputAttribute(Attribute::Index index) {
const u32 location{GetGenericAttributeIndex(index)};
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
static constexpr std::string_view swizzle = "xyzw";
u8 element = 0;
while (element < 4) {
auto xfb = GetTransformFeedbackDecoration(index, element);
if (!xfb.empty()) {
xfb = fmt::format(", {}", xfb);
}
const std::size_t remainder = 4 - element;
const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
const char* const type = FLOAT_TYPES.at(num_components - 1);
const u32 location = GetGenericAttributeIndex(index);
GenericVaryingDescription description;
description.first_element = static_cast<u8>(element);
description.is_scalar = num_components == 1;
description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
if (element != 0 || num_components != 4) {
const std::string_view name_swizzle = swizzle.substr(element, num_components);
description.name = fmt::format("{}_{}", description.name, name_swizzle);
}
for (std::size_t i = 0; i < num_components; ++i) {
const u8 offset = static_cast<u8>(location * 4 + element + i);
varying_description.insert({offset, description});
}
code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
xfb, type, description.name);
element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
}
}
void DeclareConstantBuffers() {
@@ -925,7 +1112,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
max_input_vertices.value());
}
return std::string(name);
};
@@ -980,7 +1168,7 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
Type::Float};
}
break;
@@ -1049,8 +1237,7 @@ private:
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
return {
{GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1822,16 +2009,19 @@ private:
expr += GetSampler(meta->sampler);
expr += ", ";
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += VisitOperand(operation, i).AsInt();
const std::size_t next = i + 1;
if (next == count)
expr += ')';
else if (next < count)
if (i > 0) {
expr += ", ";
}
expr += VisitOperand(operation, i).AsInt();
}
if (meta->array) {
expr += ", ";
expr += Visit(meta->array).AsInt();
}
expr += ')';
if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
@@ -1945,7 +2135,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
code.AddLine("FragColor{}[{}] = {};", render_target, component,
code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
@@ -2261,27 +2451,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
return GetDeclarationWithSuffix(index, "gpr");
return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
return GetDeclarationWithSuffix(index, "custom_var");
return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
return AppendSuffix(static_cast<u32>(pred), "pred");
}
std::string GetInputAttribute(Attribute::Index attribute) const {
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
std::string GetGenericInputAttribute(Attribute::Index attribute) const {
return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
std::string GetOutputAttribute(Attribute::Index attribute) const {
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
std::unordered_map<u8, GenericVaryingDescription> varying_description;
std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
const auto& description = varying_description.at(offset);
if (description.is_scalar) {
return description.name;
}
return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf");
return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2491,15 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf_block");
return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
return "lmem_" + suffix;
if (suffix.empty()) {
return "lmem";
} else {
return "lmem_" + std::string{suffix};
}
}
std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,19 +2508,27 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
return fmt::format("{}_{}", InternalFlagNames[index], suffix);
if (suffix.empty()) {
return InternalFlagNames[index];
} else {
return fmt::format("{}_{}", InternalFlagNames[index], suffix);
}
}
std::string GetSampler(const Sampler& sampler) const {
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
return fmt::format("{}_{}_{}", name, index, suffix);
std::string AppendSuffix(u32 index, std::string_view name) const {
if (suffix.empty()) {
return fmt::format("{}{}", name, index);
} else {
return fmt::format("{}{}_{}", name, index, suffix);
}
}
u32 GetNumPhysicalInputAttributes() const {
@@ -2334,17 +2543,31 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
bool IsRenderTargetEnabled(u32 render_target) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
return true;
}
}
return false;
}
const Device& device;
const ShaderIR& ir;
const Registry& registry;
const ShaderType stage;
const std::string suffix;
const std::string_view identifier;
const std::string_view suffix;
const Header header;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
std::optional<u32> max_input_vertices;
};
std::string GetFlowVariable(u32 i) {
return fmt::format("flow_var_{}", i);
std::string GetFlowVariable(u32 index) {
return fmt::format("flow_var{}", index);
}
class ExprDecompiler {
@@ -2531,7 +2754,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2555,28 +2778,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
return entries;
}
std::string GetCommonDeclarations() {
return R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
#define itof intBitsToFloat
#define utof uintBitsToFloat
bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
bvec2 is_nan1 = isnan(pair1);
bvec2 is_nan2 = isnan(pair2);
return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
}
const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
)";
}
std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
const std::string& suffix) {
GLSLDecompiler decompiler(device, ir, stage, suffix);
std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier,
std::string_view suffix) {
GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}
} // namespace OpenGL::GLShader
} // namespace OpenGL

View File

@@ -6,22 +6,18 @@
#include <array>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
class ShaderIR;
}
namespace OpenGL {
class Device;
}
namespace OpenGL::GLShader {
class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -78,11 +74,11 @@ struct ShaderEntries {
std::size_t shader_length{};
};
ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
std::string GetCommonDeclarations();
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
Tegra::Engines::ShaderType stage, std::string_view identifier,
std::string_view suffix = {});
std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const std::string& suffix);
} // namespace OpenGL::GLShader
} // namespace OpenGL

View File

@@ -31,32 +31,24 @@ namespace {
using ShaderCacheVersionHash = std::array<u8, 64>;
enum class TransferableEntryKind : u32 {
Raw,
Usage,
};
struct ConstBufferKey {
u32 cbuf{};
u32 offset{};
u32 value{};
u32 cbuf = 0;
u32 offset = 0;
u32 value = 0;
};
struct BoundSamplerKey {
u32 offset{};
Tegra::Engines::SamplerDescriptor sampler{};
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerKey {
u32 cbuf{};
u32 offset{};
Tegra::Engines::SamplerDescriptor sampler{};
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
constexpr u32 NativeVersion = 12;
// Making sure sizes doesn't change by accident
static_assert(sizeof(ProgramVariant) == 20);
constexpr u32 NativeVersion = 20;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // Anonymous namespace
ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
ProgramCode code_b)
: unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
code_b)} {}
ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
u32 code_size{};
u32 code_size_b{};
u32 code_size;
u32 code_size_b;
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
code.resize(code_size);
code_b.resize(code_size_b);
if (file.ReadArray(code.data(), code_size) != code_size)
if (file.ReadArray(code.data(), code_size) != code_size) {
return false;
}
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
u8 is_texture_handler_size_known;
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
if (is_texture_handler_size_known) {
texture_handler_size = texture_handler_size_value;
}
std::vector<ConstBufferKey> flat_keys(num_keys);
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
for (const auto& key : flat_keys) {
keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : flat_bound_samplers) {
bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : flat_bindless_samplers) {
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
return true;
}
bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
if (file.WriteArray(code.data(), code.size()) != code.size())
if (file.WriteArray(code.data(), code.size()) != code.size()) {
return false;
}
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
return true;
if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
std::vector<ConstBufferKey> flat_keys;
flat_keys.reserve(keys.size());
for (const auto& [address, value] : keys) {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
std::vector<BoundSamplerKey> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
}
std::vector<BindlessSamplerKey> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
BindlessSamplerKey{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
GetTitleID());
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL,
"Failed to get transferable cache version for title id={}, skipping",
GetTitleID());
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return {};
}
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
}
// Version is valid, load the shaders
constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
std::vector<ShaderDiskCacheRaw> raws;
std::vector<ShaderDiskCacheUsage> usages;
std::vector<ShaderDiskCacheEntry> entries;
while (file.Tell() < file.GetSize()) {
TransferableEntryKind kind{};
if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
return {};
}
switch (kind) {
case TransferableEntryKind::Raw: {
ShaderDiskCacheRaw entry;
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
transferable.insert({entry.GetUniqueIdentifier(), {}});
raws.push_back(std::move(entry));
break;
}
case TransferableEntryKind::Usage: {
ShaderDiskCacheUsage usage;
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
file.ReadArray(&usage.variant, 1) != 1 ||
file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
LOG_ERROR(Render_OpenGL, error_loading);
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
usages.push_back(std::move(usage));
break;
}
default:
LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
static_cast<u32>(kind));
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return {};
}
}
is_usable = true;
return {{std::move(raws), std::move(usages)}};
return {std::move(entries)};
}
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
ShaderDiskCacheOpenGL::LoadPrecompiled() {
std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
std::string path = GetPrecompiledPath();
FileUtil::IOFile file(path, "rb");
FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
GetTitleID());
LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
const auto result = LoadPrecompiledFile(file);
if (!result) {
LOG_INFO(Render_OpenGL,
"Failed to load precompiled cache for game with title id={}, removing",
GetTitleID());
file.Close();
InvalidatePrecompiled();
return {};
if (const auto result = LoadPrecompiledFile(file)) {
return *result;
}
return *result;
LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
file.Close();
InvalidatePrecompiled();
return {};
}
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
ShaderDumpsMap dumps;
std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
u32 num_keys{};
u32 num_bound_samplers{};
u32 num_bindless_samplers{};
ShaderDiskCacheUsage usage;
if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
!LoadObjectFromPrecompiled(usage.variant) ||
!LoadObjectFromPrecompiled(usage.bound_buffer) ||
!LoadObjectFromPrecompiled(num_keys) ||
!LoadObjectFromPrecompiled(num_bound_samplers) ||
!LoadObjectFromPrecompiled(num_bindless_samplers)) {
return {};
}
std::vector<ConstBufferKey> keys(num_keys);
std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
!LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
bound_samplers.size() ||
!LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
bindless_samplers.size()) {
return {};
}
for (const auto& key : keys) {
usage.keys.insert({{key.cbuf, key.offset}, key.value});
}
for (const auto& key : bound_samplers) {
usage.bound_samplers.emplace(key.offset, key.sampler);
}
for (const auto& key : bindless_samplers) {
usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
}
ShaderDiskCacheDump dump;
if (!LoadObjectFromPrecompiled(dump.binary_format)) {
u32 binary_size;
auto& entry = entries.emplace_back();
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
return {};
}
u32 binary_length{};
if (!LoadObjectFromPrecompiled(binary_length)) {
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return {};
}
dump.binary.resize(binary_length);
if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
return {};
}
dumps.emplace(std::move(usage), dump);
}
return dumps;
return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
}
}
void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
const u64 id = entry.GetUniqueIdentifier();
if (transferable.find(id) != transferable.end()) {
const u64 id = entry.unique_identifier;
if (stored_transferable.find(id) != stored_transferable.end()) {
// The shader already exists
return;
}
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!file.IsOpen()) {
return;
}
if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
transferable.insert({id, {}});
stored_transferable.insert(id);
}
void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
if (!is_usable) {
return;
}
const auto it = transferable.find(usage.unique_identifier);
ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
auto& usages{it->second};
if (usages.find(usage) != usages.end()) {
// Skip this variant since the shader is already stored.
return;
}
usages.insert(usage);
FileUtil::IOFile file = AppendTransferableFile();
if (!file.IsOpen())
return;
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
file.Close();
InvalidateTransferable();
};
if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
file.WriteObject(usage.bound_buffer) != 1 ||
file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
Close();
return;
}
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
}
void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
GLint binary_length{};
GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
GLenum binary_format{};
GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const auto Close = [&] {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
usage.unique_identifier);
InvalidatePrecompiled();
};
if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
!SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
!SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
Close();
return;
}
for (const auto& [pair, value] : usage.keys) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
Close();
return;
}
}
for (const auto& [offset, sampler] : usage.bound_samplers) {
if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
Close();
return;
}
}
for (const auto& [pair, sampler] : usage.bindless_samplers) {
const auto [cbuf, offset] = pair;
if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
Close();
return;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
!SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
!SaveArrayToPrecompiled(binary.data(), binary.size())) {
Close();
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
unique_identifier);
InvalidatePrecompiled();
}
}
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
return;
}
}

View File

@@ -19,8 +19,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
namespace Core {
class System;
@@ -32,139 +31,39 @@ class IOFile;
namespace OpenGL {
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
using ProgramCode = std::vector<u64>;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Describes the different variants a program can be compiled with.
struct ProgramVariant final {
ProgramVariant() = default;
/// Graphics constructor.
explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
: primitive_mode{primitive_mode} {}
/// Compute constructor.
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
GLenum primitive_mode{};
// Compute specific parameters.
u32 block_x{};
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
rhs.block_z, rhs.shared_memory_size,
rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::is_trivially_copyable_v<ProgramVariant>);
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
ProgramVariant variant;
u32 bound_buffer{};
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
rhs.bindless_samplers);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
return !operator==(rhs);
}
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
(static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::ProgramVariant>{}(usage.variant);
}
};
} // namespace std
namespace OpenGL {
/// Describes a shader how it's used by the guest GPU
class ShaderDiskCacheRaw {
public:
explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
ProgramCode code, ProgramCode code_b = {});
ShaderDiskCacheRaw();
~ShaderDiskCacheRaw();
/// Describes a shader and how it's used by the guest GPU
struct ShaderDiskCacheEntry {
ShaderDiskCacheEntry();
~ShaderDiskCacheEntry();
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
u64 GetUniqueIdentifier() const {
return unique_identifier;
}
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
Tegra::Engines::ShaderType GetType() const {
return type;
}
const ProgramCode& GetCode() const {
return code;
}
const ProgramCode& GetCodeB() const {
return code_b;
}
private:
u64 unique_identifier{};
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
u64 unique_identifier = 0;
std::optional<u32> texture_handler_size;
u32 bound_buffer = 0;
VideoCommon::Shader::GraphicsInfo graphics_info;
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
struct ShaderDiskCacheDump {
GLenum binary_format{};
struct ShaderDiskCachePrecompiled {
u64 unique_identifier = 0;
GLenum binary_format = 0;
std::vector<u8> binary;
};
@@ -174,11 +73,10 @@ public:
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
LoadTransferable();
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
void SaveRaw(const ShaderDiskCacheRaw& entry);
/// Saves shader usage to the transferable file. Does not check for collisions.
void SaveUsage(const ShaderDiskCacheUsage& usage);
void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
LoadPrecompiledFile(FileUtil::IOFile& file);
std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
FileUtil::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
std::unordered_set<u64> stored_transferable;
// The cache has been loaded at boot
bool is_usable{};

View File

@@ -1,109 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string>
#include <fmt/format.h>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL::GLShader {
using Tegra::Engines::Maxwell3D;
using Tegra::Engines::ShaderType;
using VideoCommon::Shader::CompileDepth;
using VideoCommon::Shader::CompilerSettings;
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (std140, binding = {}) uniform vs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Vertex, "vertex");
if (ir_b) {
out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
}
out += R"(
void main() {
gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
execute_vertex();
)";
if (ir_b) {
out += " execute_vertex_b();";
}
out += "}\n";
return out;
}
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (std140, binding = {}) uniform gs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Geometry, "geometry");
out += R"(
void main() {
execute_geometry();
}
)";
return out;
}
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += fmt::format(R"(
layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2;
layout (location = 3) out vec4 FragColor3;
layout (location = 4) out vec4 FragColor4;
layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
layout (std140, binding = {}) uniform fs_config {{
float y_direction;
}};
)",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Fragment, "fragment");
out += R"(
void main() {
execute_fragment();
}
)";
return out;
}
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations();
out += Decompile(device, ir, ShaderType::Compute, "compute");
out += R"(
void main() {
execute_compute();
}
)";
return out;
}
} // namespace OpenGL::GLShader

View File

@@ -1,34 +0,0 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
class Device;
}
namespace OpenGL::GLShader {
using VideoCommon::Shader::ProgramCode;
using VideoCommon::Shader::ShaderIR;
/// Generates the GLSL vertex shader program source code for the given VS program
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
/// Generates the GLSL geometry shader program source code for the given GS program
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL fragment shader program source code for the given FS program
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
/// Generates the GLSL compute shader program source code for the given CS program
std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
} // namespace OpenGL::GLShader

View File

@@ -94,6 +94,15 @@ void SetupDirtyShaders(Tables& tables) {
Shaders);
}
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
tables[1][OFF(polygon_mode_front)] = PolygonModes;
tables[1][OFF(polygon_mode_back)] = PolygonModes;
tables[0][OFF(fill_rectangle)] = PolygonModes;
}
void SetupDirtyDepthTest(Tables& tables) {
auto& table = tables[0];
table[OFF(depth_test_enable)] = DepthTest;
@@ -211,6 +220,7 @@ void StateTracker::Initialize() {
SetupDirtyVertexArrays(tables);
SetupDirtyVertexFormat(tables);
SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
SetupDirtyAlphaTest(tables);

View File

@@ -59,6 +59,10 @@ enum : u8 {
Shaders,
ClipDistances,
PolygonModes,
PolygonModeFront,
PolygonModeBack,
ColorMask,
FrontFace,
CullTest,
@@ -111,6 +115,13 @@ public:
flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
}
void NotifyPolygonModes() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;

View File

@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI

View File

@@ -488,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
return GL_COPY;
}
inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
switch (polygon_mode) {
case Maxwell::PolygonMode::Point:
return GL_POINT;
case Maxwell::PolygonMode::Line:
return GL_LINE;
case Maxwell::PolygonMode::Fill:
return GL_FILL;
}
UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
return GL_FILL;
}
} // namespace MaxwellToGL
} // namespace OpenGL

View File

@@ -5,8 +5,11 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@@ -25,6 +28,8 @@
namespace OpenGL {
namespace {
// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
// to wait on available presentation frames.
constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -41,124 +46,6 @@ struct Frame {
bool is_srgb{}; /// Framebuffer is sRGB or RGB
};
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
* but also make sure that rendering happens at the pace that the frontend dictates. This is a
* helper class that the renderer uses to sync frames between the render thread and the presentation
* thread
*/
class FrameMailbox {
public:
std::mutex swap_chain_lock;
std::condition_variable present_cv;
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frame*> free_queue;
std::deque<Frame*> present_queue;
Frame* previous_frame{};
FrameMailbox() {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
~FrameMailbox() {
// lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock{swap_chain_lock};
std::queue<Frame*>().swap(free_queue);
present_queue.clear();
present_cv.notify_all();
}
void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frame* GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void ReleaseRenderFrame(Frame* frame) {
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
}
Frame* TryGetPresentFrame(int timeout_ms) {
std::unique_lock lock{swap_chain_lock};
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
}
// the newest entries are pushed to the front of the queue
Frame* frame = present_queue.front();
present_queue.pop_front();
// remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
return frame;
}
};
namespace {
constexpr char VERTEX_SHADER[] = R"(
#version 430 core
@@ -211,6 +98,24 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
/// Returns true if any debug tool is attached
bool HasDebugTool() {
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
if (nsight) {
return true;
}
GLint num_extensions;
glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
if (!std::strcmp(name, "GL_EXT_debug_tool")) {
return true;
}
}
return false;
}
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
/**
* For smooth Vsync rendering, we want to always present the latest frame that the core generates,
* but also make sure that rendering happens at the pace that the frontend dictates. This is a
* helper class that the renderer uses to sync frames between the render thread and the presentation
* thread
*/
class FrameMailbox {
public:
std::mutex swap_chain_lock;
std::condition_variable present_cv;
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frame*> free_queue;
std::deque<Frame*> present_queue;
Frame* previous_frame{};
FrameMailbox() : has_debug_tool{HasDebugTool()} {
for (auto& frame : swap_chain) {
free_queue.push(&frame);
}
}
~FrameMailbox() {
// lock the mutex and clear out the present and free_queues and notify any people who are
// blocked to prevent deadlock on shutdown
std::scoped_lock lock{swap_chain_lock};
std::queue<Frame*>().swap(free_queue);
present_queue.clear();
present_cv.notify_all();
}
void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
frame->present.Release();
frame->present.Create();
GLint previous_draw_fbo{};
glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
frame->color_reloaded = false;
}
void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
// Recreate the color texture attachment
frame->color.Release();
frame->color.Create();
const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
// Recreate the FBO for the render target
frame->render.Release();
frame->render.Create();
glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
frame->color.handle);
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
}
frame->width = width;
frame->height = height;
frame->color_reloaded = true;
}
Frame* GetRenderFrame() {
std::unique_lock lock{swap_chain_lock};
// If theres no free frames, we will reuse the oldest render frame
if (free_queue.empty()) {
auto frame = present_queue.back();
present_queue.pop_back();
return frame;
}
Frame* frame = free_queue.front();
free_queue.pop();
return frame;
}
void ReleaseRenderFrame(Frame* frame) {
std::unique_lock lock{swap_chain_lock};
present_queue.push_front(frame);
present_cv.notify_one();
DebugNotifyNextFrame();
}
Frame* TryGetPresentFrame(int timeout_ms) {
DebugWaitForNextFrame();
std::unique_lock lock{swap_chain_lock};
// wait for new entries in the present_queue
present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
[&] { return !present_queue.empty(); });
if (present_queue.empty()) {
// timed out waiting for a frame to draw so return the previous frame
return previous_frame;
}
// free the previous frame and add it back to the free queue
if (previous_frame) {
free_queue.push(previous_frame);
}
// the newest entries are pushed to the front of the queue
Frame* frame = present_queue.front();
present_queue.pop_front();
// remove all old entries from the present queue and move them back to the free_queue
for (auto f : present_queue) {
free_queue.push(f);
}
present_queue.clear();
previous_frame = frame;
return frame;
}
private:
std::mutex debug_synch_mutex;
std::condition_variable debug_synch_condition;
std::atomic_int frame_for_debug{};
const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
/// Signal that a new frame is available (called from GPU thread)
void DebugNotifyNextFrame() {
if (!has_debug_tool) {
return;
}
frame_for_debug++;
std::lock_guard lock{debug_synch_mutex};
debug_synch_condition.notify_one();
}
/// Wait for a new frame to be available (called from presentation thread)
void DebugWaitForNextFrame() {
if (!has_debug_tool) {
return;
}
const int last_frame = frame_for_debug;
std::unique_lock lock{debug_synch_mutex};
debug_synch_condition.wait(lock,
[this, last_frame] { return frame_for_debug > last_frame; });
}
};
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
frame_mailbox{std::make_unique<FrameMailbox>()} {}
@@ -576,6 +628,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
state_tracker.NotifyViewport0();
state_tracker.NotifyScissor0();
state_tracker.NotifyColorMask0();
@@ -611,6 +664,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glDisable(GL_ALPHA_TEST);
glDisablei(GL_BLEND, 0);
glDisablei(GL_SCISSOR_TEST, 0);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glCullFace(GL_BACK);
glFrontFace(GL_CW);
glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);

View File

@@ -125,6 +125,7 @@ struct FormatTuple {
{vk::Format::eR8Uint, Attachable | Storage}, // R8UI
{vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
{vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
{vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
{vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
{vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
{vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -331,6 +332,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR16G16B16Unorm;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
return vk::Format::eR16G16B16A16Unorm;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return vk::Format::eA2B10G10R10UnormPack32;
default:
break;
}

View File

@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
features.shaderStorageImageReadWithoutFormat =
is_shader_storage_img_read_without_format_supported;
features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
}
vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
if (ext_transform_feedback) {
transform_feedback.transformFeedback = true;
transform_feedback.geometryStreams = true;
SetNext(next, transform_feedback);
} else {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
}
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
};
extensions.reserve(14);
extensions.reserve(15);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
[[maybe_unused]] const bool nsight =
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
bool khr_shader_float16_int8{};
bool ext_subgroup_size_control{};
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
false);
Test(extension, ext_depth_range_unrestricted,
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
Test(extension, ext_shader_viewport_index_layer,
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
false);
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
}
}
if (khr_shader_float16_int8) {
if (has_khr_shader_float16_int8) {
is_float16_supported =
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
if (ext_subgroup_size_control) {
if (has_ext_subgroup_size_control) {
const auto features =
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
is_warp_potentially_bigger = true;
}
if (has_ext_transform_feedback) {
const auto features =
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
const auto properties =
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
if (features.transformFeedback && features.geometryStreams &&
properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
ext_transform_feedback = true;
}
}
return extensions;
}
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
is_shader_storage_img_read_without_format_supported =
supported_features.shaderStorageImageReadWithoutFormat;
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32Uint,
vk::Format::eR16G16B16A16Uint,
vk::Format::eR16G16B16A16Snorm,
vk::Format::eR16G16B16A16Unorm,
vk::Format::eR16G16Unorm,
vk::Format::eR16G16Snorm,

View File

@@ -122,11 +122,6 @@ public:
return properties.limits.maxPushConstantsSize;
}
/// Returns true if Shader storage Image Read Without Format supported.
bool IsShaderStorageImageReadWithoutFormatSupported() const {
return is_shader_storage_img_read_without_format_supported;
}
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
}
/// Returns true if formatless image load is supported.
bool IsFormatlessImageLoadSupported() const {
return is_formatless_image_load_supported;
}
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
return ext_shader_viewport_index_layer;
}
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
}
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
bool IsNvDeviceDiagnosticCheckpoints() const {
return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
const vk::PhysicalDevice physical; ///< Physical device.
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
vk::PhysicalDeviceProperties properties; ///< Device properties.
UniqueDevice logical; ///< Logical device.
vk::Queue graphics_queue; ///< Main graphics queue.
vk::Queue present_queue; ///< Main present queue.
u32 graphics_family{}; ///< Main graphics queue family index.
u32 present_family{}; ///< Main present queue family index.
vk::DriverIdKHR driver_id{}; ///< Driver ID.
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.

View File

@@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
ProgramCode program_code, u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, locker},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
shader_ir{this->program_code, main_offset, compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default;
@@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
specialization.workgroup_size = key.workgroup_size;
specialization.shared_memory_size = key.shared_memory_size;
const SPIRVShader spirv_shader{
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
shader->GetEntries()};
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
shader->GetRegistry(), specialization),
shader->GetEntries()};
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, spirv_shader);
return *entry;
@@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const auto& gpu = system.GPU().Maxwell3D();
Specialization specialization;
specialization.primitive_topology = fixed_state.input_assembly.topology;
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
specialization.point_size = fixed_state.input_assembly.point_size;
}
@@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
SPIRVProgram program;
std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
const auto program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
entries};
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
entries};
if (program_enum == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration

View File

@@ -25,7 +25,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
@@ -132,6 +132,10 @@ public:
return shader_ir;
}
const VideoCommon::Shader::Registry& GetRegistry() const {
return registry;
}
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
@@ -147,7 +151,7 @@ private:
GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code;
VideoCommon::Shader::ConstBufferLocker locker;
VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir;
ShaderEntries entries;
};

View File

@@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
}
BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}
draw_params.Draw(cmdbuf, dld);
});
EndTransformFeedback();
}
void RasterizerVulkan::Clear() {
@@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateStencilFaces(regs);
}
void RasterizerVulkan::BeginTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
const auto& binding = regs.tfb_bindings[0];
UNIMPLEMENTED_IF(binding.buffer_enable == 0);
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
});
}
void RasterizerVulkan::EndTransformFeedback() {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
scheduler.Record(
[](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
}
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
BufferBindings& buffer_bindings) {
const auto& regs = system.GPU().Maxwell3D().regs;
@@ -854,7 +896,7 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute();
const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) {
const auto tic = GetTextureInfo(gpu, entry, stage).tic;
SetupImage(tic, entry);
@@ -915,6 +957,13 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
const Tegra::Engines::ConstBufferInfo& buffer) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
return;
}
// Align the size to avoid bad std140 interactions
const std::size_t size =
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
@@ -1102,7 +1151,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
// This implementation assumes that all attributes are used in the shader.
const GPUVAddr start{regs.vertex_array[index].StartAddress()};
const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
DEBUG_ASSERT(end > start);
DEBUG_ASSERT(end >= start);
size += (end - start + 1) * regs.vertex_array[index].enable;
}

View File

@@ -169,6 +169,10 @@ private:
void UpdateDynamicStates();
void BeginTransformFeedback();
void EndTransformFeedback();
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,

View File

@@ -5,7 +5,9 @@
#include <functional>
#include <limits>
#include <map>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <fmt/format.h>
@@ -24,6 +26,7 @@
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/shader/transform_feedback.h"
namespace Vulkan {
@@ -93,6 +96,12 @@ struct VertexIndices {
std::optional<u32> clip_distances;
};
struct GenericVaryingDescription {
Id id = nullptr;
u32 first_element = 0;
bool is_scalar = false;
};
spv::Dim GetSamplerDim(const Sampler& sampler) {
ASSERT(!sampler.IsBuffer());
switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
class SPIRVDecompiler final : public Sirit::Module {
public:
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
const Specialization& specialization)
const Registry& registry, const Specialization& specialization)
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
specialization{specialization} {
registry{registry}, specialization{specialization} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
AddCapability(spv::Capability::Shader);
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
if (!transform_feedback.empty()) {
if (device.IsExtTransformFeedbackSupported()) {
AddCapability(spv::Capability::TransformFeedback);
} else {
LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
"supported on this device");
}
}
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
}
}
if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
@@ -318,25 +340,29 @@ public:
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common2.threads_per_input_primitive);
break;
case ShaderType::TesselationEval:
case ShaderType::TesselationEval: {
const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Tessellation);
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
AddExecutionMode(main, specialization.tessellation.clockwise
AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
AddExecutionMode(main, info.tessellation_clockwise
? spv::ExecutionMode::VertexOrderCw
: spv::ExecutionMode::VertexOrderCcw);
break;
case ShaderType::Geometry:
}
case ShaderType::Geometry: {
const auto& info = registry.GetGraphicsInfo();
AddCapability(spv::Capability::Geometry);
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
header.common4.max_output_vertices);
// TODO(Rodrigo): Where can we get this info from?
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
break;
}
case ShaderType::Fragment:
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
if (stage != ShaderType::Geometry) {
return;
}
const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
const auto& info = registry.GetGraphicsInfo();
const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
DeclareInputVertexArray(num_input);
DeclareOutputVertex();
}
@@ -742,12 +769,34 @@ private:
}
void DeclareOutputAttributes() {
if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
return;
}
UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
for (const auto index : ir.GetOutputAttributes()) {
if (!IsGenericAttribute(index)) {
continue;
}
const u32 location = GetGenericAttributeLocation(index);
Id type = t_float4;
DeclareOutputAttribute(index);
}
}
void DeclareOutputAttribute(Attribute::Index index) {
static constexpr std::string_view swizzle = "xyzw";
const u32 location = GetGenericAttributeLocation(index);
u8 element = 0;
while (element < 4) {
const std::size_t remainder = 4 - element;
std::size_t num_components = remainder;
const std::optional tfb = GetTransformFeedbackInfo(index, element);
if (tfb) {
num_components = tfb->components;
}
Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
Id varying_default = v_varying_default;
if (IsOutputAttributeArray()) {
const u32 num = GetNumOutputVertices();
@@ -760,15 +809,47 @@ private:
}
type = TypePointer(spv::StorageClass::Output, type);
std::string name = fmt::format("out_attr{}", location);
if (num_components < 4 || element > 0) {
name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
}
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
output_attributes.emplace(index, id);
Name(AddGlobalVariable(id), name);
GenericVaryingDescription description;
description.id = id;
description.first_element = element;
description.is_scalar = num_components == 1;
for (u32 i = 0; i < num_components; ++i) {
const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
output_attributes.emplace(offset, description);
}
interfaces.push_back(id);
Decorate(id, spv::Decoration::Location, location);
if (element > 0) {
Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
}
if (tfb && device.IsExtTransformFeedbackSupported()) {
Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
}
element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
}
}
std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
return {};
}
return it->second;
}
u32 DeclareConstantBuffers(u32 binding) {
for (const auto& [index, size] : ir.GetConstantBuffers()) {
const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
@@ -898,7 +979,7 @@ private:
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
case ShaderType::TesselationControl:
case ShaderType::TesselationEval:
return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
}
default:
if (IsGenericAttribute(attribute)) {
const Id composite = output_attributes.at(attribute);
return {ArrayPass(t_out_float, composite, {element}), Type::Float};
const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
const GenericVaryingDescription description = output_attributes.at(offset);
const Id composite = description.id;
std::vector<u32> indices;
if (!description.is_scalar) {
indices.push_back(element - description.first_element);
}
return {ArrayPass(t_out_float, composite, indices), Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
}
Expression ImageLoad(Operation operation) {
if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
if (!device.IsFormatlessImageLoadSupported()) {
return {v_float_zero, Type::Float};
}
@@ -2258,11 +2345,11 @@ private:
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
switch (type) {
case Type::Float:
return {nullptr, t_float2, t_float3, t_float4};
return {t_float, t_float2, t_float3, t_float4};
case Type::Int:
return {nullptr, t_int2, t_int3, t_int4};
return {t_int, t_int2, t_int3, t_int4};
case Type::Uint:
return {nullptr, t_uint2, t_uint3, t_uint4};
return {t_uint, t_uint2, t_uint3, t_uint4};
default:
UNIMPLEMENTED();
return {};
@@ -2495,7 +2582,9 @@ private:
const ShaderIR& ir;
const ShaderType stage;
const Tegra::Shader::Header header;
const Registry& registry;
const Specialization& specialization;
std::unordered_map<u8, VaryingTFB> transform_feedback;
const Id t_void = Name(TypeVoid(), "void");
@@ -2584,7 +2673,7 @@ private:
Id shared_memory{};
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
std::map<Attribute::Index, Id> input_attributes;
std::map<Attribute::Index, Id> output_attributes;
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
ShaderType stage, const Specialization& specialization) {
return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
ShaderType stage, const VideoCommon::Shader::Registry& registry,
const Specialization& specialization) {
return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
}
} // namespace Vulkan

View File

@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
Maxwell::PrimitiveTopology primitive_topology{};
std::optional<float> point_size{};
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
// Tessellation specific
struct {
Maxwell::TessellationPrimitive primitive{};
Maxwell::TessellationSpacing spacing{};
bool clockwise{};
} tessellation;
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
Tegra::Engines::ShaderType stage, const Specialization& specialization);
Tegra::Engines::ShaderType stage,
const VideoCommon::Shader::Registry& registry,
const Specialization& specialization);
} // namespace Vulkan

View File

@@ -73,7 +73,8 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
const auto usage =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eIndexBuffer;
const u32 log2 = Common::Log2Ceil64(size);
const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
nullptr);

View File

@@ -1,126 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <tuple>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/const_buffer_locker.h"
namespace VideoCommon::Shader {
using Tegra::Engines::SamplerDescriptor;
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
: stage{shader_stage} {}
ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine)
: stage{shader_stage}, engine{&engine} {}
ConstBufferLocker::~ConstBufferLocker() = default;
std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
const std::pair<u32, u32> key = {buffer, offset};
const auto iter = keys.find(key);
if (iter != keys.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
keys.emplace(key, value);
return value;
}
std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
const u32 key = offset;
const auto iter = bound_samplers.find(key);
if (iter != bound_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
bound_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
bindless_samplers.emplace(key, value);
return value;
}
std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
if (bound_buffer_saved) {
return bound_buffer;
}
if (!engine) {
return std::nullopt;
}
bound_buffer_saved = true;
bound_buffer = engine->GetBoundBuffer();
return bound_buffer;
}
void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
bound_samplers.insert_or_assign(offset, sampler);
}
void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
bound_buffer_saved = true;
bound_buffer = buffer;
}
bool ConstBufferLocker::IsConsistent() const {
if (!engine) {
return false;
}
return std::all_of(keys.begin(), keys.end(),
[this](const auto& pair) {
const auto [cbuf, offset] = pair.first;
const auto value = pair.second;
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
}) &&
std::all_of(bound_samplers.begin(), bound_samplers.end(),
[this](const auto& sampler) {
const auto [key, value] = sampler;
return value == engine->AccessBoundSampler(stage, key);
}) &&
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
[this](const auto& sampler) {
const auto [cbuf, offset] = sampler.first;
const auto value = sampler.second;
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
});
}
bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
return std::tie(keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
}
} // namespace VideoCommon::Shader

View File

@@ -1,103 +0,0 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <unordered_map>
#include "common/common_types.h"
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
/**
* The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
* compiler. with it, the shader can obtain required data from GPU state and store it for disk
* shader compilation.
*/
class ConstBufferLocker {
public:
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine);
~ConstBufferLocker();
/// Retrieves a key from the locker, if it's registered, it will give the registered value, if
/// not it will obtain it from maxwell3d and register it.
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
std::optional<u32> ObtainBoundBuffer();
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
/// Inserts a bound sampler key.
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Set the bound buffer for this locker.
void SetBoundBuffer(u32 buffer);
/// Checks keys and samplers against engine's current const buffers. Returns true if they are
/// the same value, false otherwise;
bool IsConsistent() const;
/// Returns true if the keys are equal to the other ones in the locker.
bool HasEqualKeys(const ConstBufferLocker& rhs) const;
/// Gives an getter to the const buffer keys in the database.
const KeyMap& GetKeys() const {
return keys;
}
/// Gets samplers database.
const BoundSamplerMap& GetBoundSamplers() const {
return bound_samplers;
}
/// Gets bindless samplers database.
const BindlessSamplerMap& GetBindlessSamplers() const {
return bindless_samplers;
}
/// Gets bound buffer used on this shader
u32 GetBoundBuffer() const {
return bound_buffer;
}
/// Obtains access to the guest driver's profile.
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
if (engine) {
return &engine->AccessGuestDriverProfile();
}
return nullptr;
}
private:
const Tegra::Engines::ShaderType stage;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
bool bound_buffer_saved{};
u32 bound_buffer{};
};
} // namespace VideoCommon::Shader

View File

@@ -13,6 +13,7 @@
#include "common/common_types.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
: program_code{program_code}, locker{locker}, start{start} {}
explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
: program_code{program_code}, registry{registry}, start{start} {}
const ProgramCode& program_code;
ConstBufferLocker& locker;
Registry& registry;
u32 start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
const s32 pc_target = offset + result.relative_position;
std::vector<CaseBranch> branches;
for (u32 i = 0; i < result.entries; i++) {
auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
if (!key) {
return {ParseResult::AbnormalFlow, parse_info};
}
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker) {
Registry& registry) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
CFGRebuildState state{program_code, start_address, locker};
CFGRebuildState state{program_code, start_address, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);

View File

@@ -12,6 +12,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
ConstBufferLocker& locker);
Registry& registry);
} // namespace VideoCommon::Shader

View File

@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
if (gpu_driver == nullptr) {
LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
return;
}
if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
VideoCore::GuestDriverProfile* gpu_driver,
VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
if (gpu_driver == nullptr) {
LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
return std::nullopt;
}
const u32 base_offset = sampler_to_deduce.GetOffset();
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
if (max_offset == std::numeric_limits<u32>::max()) {
return std::nullopt;
}
return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
}
} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
auto info = ScanFlow(program_code, main_offset, settings, locker);
auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
auto gpu_driver = locker.AccessGuestDriverProfile();
auto gpu_driver = registry.AccessGuestDriverProfile();
DeduceTextureHandlerSize(gpu_driver, used_samplers);
// Deduce Indexed Samplers
if (!uses_indexed_samplers) {

View File

@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.bfe.negate_b);
Node op_a = GetRegister(instr.gpr8);
op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
Node op_b = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::BFE_R:
return GetRegister(instr.gpr20);
case OpCode::Id::BFE_C:
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::BFE_IMM:
return Immediate(instr.alu.GetSignedImm20_20());
default:
UNREACHABLE();
return Immediate(0);
}
}();
switch (opcode->get().GetId()) {
case OpCode::Id::BFE_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in BFE is not implemented");
UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
const Node outer_shift_imm =
Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
const bool is_signed = instr.bfe.is_signed;
const Node inner_shift =
Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
const Node outer_shift =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
SetRegister(bb, instr.gpr0, outer_shift);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
// using reverse parallel method in
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
// note for later if possible to implement faster method.
if (instr.bfe.brev) {
const auto swap = [&](u32 s, u32 mask) {
Node v1 =
SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
if (mask != 0) {
v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
Immediate(mask));
}
Node v2 = op_a;
if (mask != 0) {
v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
Immediate(mask));
}
v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
Immediate(s));
return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
std::move(v2));
};
op_a = swap(1, 0x55555555U);
op_a = swap(2, 0x33333333U);
op_a = swap(4, 0x0F0F0F0FU);
op_a = swap(8, 0x00FF00FFU);
op_a = swap(16, 0);
}
const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
Immediate(0), Immediate(8));
const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
Immediate(8), Immediate(8));
auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
SetRegister(bb, instr.gpr0, std::move(result));
return pc;
}

View File

@@ -12,6 +12,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
if (sampler_info) {
return *sampler_info;
}
const auto sampler =
buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
: registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
return SamplerInfo{TextureType::Texture2D, false, false, false};

View File

@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
case OperationCode::IBitfieldExtract:
return OperationCode::UBitfieldExtract;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:

View File

@@ -0,0 +1,161 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <tuple>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
using Tegra::Engines::ConstBufferEngineInterface;
using Tegra::Engines::SamplerDescriptor;
using Tegra::Engines::ShaderType;
namespace {
GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage == ShaderType::Compute) {
return {};
}
auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
GraphicsInfo info;
info.tfb_layouts = graphics.regs.tfb_layouts;
info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
info.primitive_topology = graphics.regs.draw.topology;
info.tessellation_primitive = graphics.regs.tess_mode.prim;
info.tessellation_spacing = graphics.regs.tess_mode.spacing;
info.tfb_enabled = graphics.regs.tfb_enabled;
info.tessellation_clockwise = graphics.regs.tess_mode.cw;
return info;
}
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage != ShaderType::Compute) {
return {};
}
auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
const auto& launch = compute.launch_description;
ComputeInfo info;
info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
info.local_memory_size_in_words = launch.local_pos_alloc;
info.shared_memory_size_in_words = launch.shared_alloc;
return info;
}
} // Anonymous namespace
Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
Registry::Registry(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine)
: stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
shader_stage, engine)} {}
Registry::~Registry() = default;
std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
const std::pair<u32, u32> key = {buffer, offset};
const auto iter = keys.find(key);
if (iter != keys.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
keys.emplace(key, value);
return value;
}
std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
const u32 key = offset;
const auto iter = bound_samplers.find(key);
if (iter != bound_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
bound_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
bindless_samplers.emplace(key, value);
return value;
}
void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
keys.insert_or_assign({buffer, offset}, value);
}
void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
bound_samplers.insert_or_assign(offset, sampler);
}
void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
bindless_samplers.insert_or_assign({buffer, offset}, sampler);
}
bool Registry::IsConsistent() const {
if (!engine) {
return true;
}
return std::all_of(keys.begin(), keys.end(),
[this](const auto& pair) {
const auto [cbuf, offset] = pair.first;
const auto value = pair.second;
return value == engine->AccessConstBuffer32(stage, cbuf, offset);
}) &&
std::all_of(bound_samplers.begin(), bound_samplers.end(),
[this](const auto& sampler) {
const auto [key, value] = sampler;
return value == engine->AccessBoundSampler(stage, key);
}) &&
std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
[this](const auto& sampler) {
const auto [cbuf, offset] = sampler.first;
const auto value = sampler.second;
return value == engine->AccessBindlessSampler(stage, cbuf, offset);
});
}
bool Registry::HasEqualKeys(const Registry& rhs) const {
return std::tie(keys, bound_samplers, bindless_samplers) ==
std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
}
const GraphicsInfo& Registry::GetGraphicsInfo() const {
ASSERT(stage != Tegra::Engines::ShaderType::Compute);
return graphics_info;
}
const ComputeInfo& Registry::GetComputeInfo() const {
ASSERT(stage == Tegra::Engines::ShaderType::Compute);
return compute_info;
}
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,137 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include "common/common_types.h"
#include "common/hash.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/guest_driver.h"
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
struct GraphicsInfo {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
tfb_layouts{};
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
Maxwell::PrimitiveTopology primitive_topology{};
Maxwell::TessellationPrimitive tessellation_primitive{};
Maxwell::TessellationSpacing tessellation_spacing{};
bool tfb_enabled = false;
bool tessellation_clockwise = false;
};
static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
std::is_standard_layout_v<GraphicsInfo>);
struct ComputeInfo {
std::array<u32, 3> workgroup_size{};
u32 shared_memory_size_in_words = 0;
u32 local_memory_size_in_words = 0;
};
static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
struct SerializedRegistryInfo {
VideoCore::GuestDriverProfile guest_driver_profile;
u32 bound_buffer = 0;
GraphicsInfo graphics;
ComputeInfo compute;
};
/**
* The Registry is a class use to interface the 3D and compute engines with the shader compiler.
* With it, the shader can obtain required data from GPU state and store it for disk shader
* compilation.
*/
class Registry {
public:
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
explicit Registry(Tegra::Engines::ShaderType shader_stage,
Tegra::Engines::ConstBufferEngineInterface& engine);
~Registry();
/// Retrieves a key from the registry, if it's registered, it will give the registered value, if
/// not it will obtain it from maxwell3d and register it.
std::optional<u32> ObtainKey(u32 buffer, u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
void InsertKey(u32 buffer, u32 offset, u32 value);
/// Inserts a bound sampler key.
void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Inserts a bindless sampler key.
void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
/// Checks keys and samplers against engine's current const buffers.
/// Returns true if they are the same value, false otherwise.
bool IsConsistent() const;
/// Returns true if the keys are equal to the other ones in the registry.
bool HasEqualKeys(const Registry& rhs) const;
/// Returns graphics information from this shader
const GraphicsInfo& GetGraphicsInfo() const;
/// Returns compute information from this shader
const ComputeInfo& GetComputeInfo() const;
/// Gives an getter to the const buffer keys in the database.
const KeyMap& GetKeys() const {
return keys;
}
/// Gets samplers database.
const BoundSamplerMap& GetBoundSamplers() const {
return bound_samplers;
}
/// Gets bindless samplers database.
const BindlessSamplerMap& GetBindlessSamplers() const {
return bindless_samplers;
}
/// Gets bound buffer used on this shader
u32 GetBoundBuffer() const {
return bound_buffer;
}
/// Obtains access to the guest driver's profile.
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
}
private:
const Tegra::Engines::ShaderType stage;
VideoCore::GuestDriverProfile stored_guest_driver_profile;
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
BindlessSamplerMap bindless_samplers;
u32 bound_buffer;
GraphicsInfo graphics_info;
ComputeInfo compute_info;
};
} // namespace VideoCommon::Shader

View File

@@ -11,6 +11,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Registry& registry)
: program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
Decode();
PostDecode();
}

View File

@@ -18,8 +18,8 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
ConstBufferLocker& locker);
Registry& registry);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
ConstBufferLocker& locker;
Registry& registry;
bool decompiled{};
bool disable_flow_stack{};

View File

@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track};
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
auto bound_buffer = locker.ObtainBoundBuffer();
if (!bound_buffer) {
const u32 bound_buffer = registry.GetBoundBuffer();
if (bound_buffer != cbuf->GetIndex()) {
return {};
}
if (*bound_buffer != cbuf->GetIndex()) {
return {};
}
auto pair = DecoupleIndirectRead(*operation);
const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {};
}
auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
auto gpu_driver = locker.AccessGuestDriverProfile();
if (gpu_driver == nullptr) {
return {};
}
const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
Immediate(gpu_driver->GetTextureHandlerSize()));
const Node op =
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));

View File

@@ -0,0 +1,115 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/transform_feedback.h"
namespace VideoCommon::Shader {
namespace {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
/// Attribute offsets that describe a vector
constexpr std::array VECTORS = {
28, // gl_Position
32, // Generic 0
36, // Generic 1
40, // Generic 2
44, // Generic 3
48, // Generic 4
52, // Generic 5
56, // Generic 6
60, // Generic 7
64, // Generic 8
68, // Generic 9
72, // Generic 10
76, // Generic 11
80, // Generic 12
84, // Generic 13
88, // Generic 14
92, // Generic 15
96, // Generic 16
100, // Generic 17
104, // Generic 18
108, // Generic 19
112, // Generic 20
116, // Generic 21
120, // Generic 22
124, // Generic 23
128, // Generic 24
132, // Generic 25
136, // Generic 26
140, // Generic 27
144, // Generic 28
148, // Generic 29
152, // Generic 30
156, // Generic 31
160, // gl_FrontColor
164, // gl_FrontSecondaryColor
160, // gl_BackColor
164, // gl_BackSecondaryColor
192, // gl_TexCoord[0]
196, // gl_TexCoord[1]
200, // gl_TexCoord[2]
204, // gl_TexCoord[3]
208, // gl_TexCoord[4]
212, // gl_TexCoord[5]
216, // gl_TexCoord[6]
220, // gl_TexCoord[7]
};
} // namespace
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
std::unordered_map<u8, VaryingTFB> tfb;
for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
const auto& locations = info.tfb_varying_locs[buffer];
const auto& layout = info.tfb_layouts[buffer];
const std::size_t varying_count = layout.varying_count;
std::size_t highest = 0;
for (std::size_t offset = 0; offset < varying_count; ++offset) {
const std::size_t base_offset = offset;
const u8 location = locations[offset];
VaryingTFB varying;
varying.buffer = layout.stream;
varying.stride = layout.stride;
varying.offset = offset * sizeof(u32);
varying.components = 1;
if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
const u8 base_index = location / 4;
while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
++offset;
++varying.components;
}
}
[[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
}
UNIMPLEMENTED_IF(highest != layout.stride);
}
return tfb;
}
} // namespace VideoCommon::Shader

View File

@@ -0,0 +1,23 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
struct VaryingTFB {
std::size_t buffer;
std::size_t stride;
std::size_t offset;
std::size_t components;
};
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
} // namespace VideoCommon::Shader

View File

@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::RGBA16F;
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return PixelFormat::RGBA16U;
case Tegra::RenderTargetFormat::RGBA16_SNORM:
return PixelFormat::RGBA16S;
case Tegra::RenderTargetFormat::RGBA16_UINT:
return PixelFormat::RGBA16UI;
case Tegra::RenderTargetFormat::RGBA32_FLOAT:

View File

@@ -25,82 +25,83 @@ enum class PixelFormat {
R8UI = 7,
RGBA16F = 8,
RGBA16U = 9,
RGBA16UI = 10,
R11FG11FB10F = 11,
RGBA32UI = 12,
DXT1 = 13,
DXT23 = 14,
DXT45 = 15,
DXN1 = 16, // This is also known as BC4
DXN2UNORM = 17,
DXN2SNORM = 18,
BC7U = 19,
BC6H_UF16 = 20,
BC6H_SF16 = 21,
ASTC_2D_4X4 = 22,
BGRA8 = 23,
RGBA32F = 24,
RG32F = 25,
R32F = 26,
R16F = 27,
R16U = 28,
R16S = 29,
R16UI = 30,
R16I = 31,
RG16 = 32,
RG16F = 33,
RG16UI = 34,
RG16I = 35,
RG16S = 36,
RGB32F = 37,
RGBA8_SRGB = 38,
RG8U = 39,
RG8S = 40,
RG32UI = 41,
RGBX16F = 42,
R32UI = 43,
R32I = 44,
ASTC_2D_8X8 = 45,
ASTC_2D_8X5 = 46,
ASTC_2D_5X4 = 47,
BGRA8_SRGB = 48,
DXT1_SRGB = 49,
DXT23_SRGB = 50,
DXT45_SRGB = 51,
BC7U_SRGB = 52,
R4G4B4A4U = 53,
ASTC_2D_4X4_SRGB = 54,
ASTC_2D_8X8_SRGB = 55,
ASTC_2D_8X5_SRGB = 56,
ASTC_2D_5X4_SRGB = 57,
ASTC_2D_5X5 = 58,
ASTC_2D_5X5_SRGB = 59,
ASTC_2D_10X8 = 60,
ASTC_2D_10X8_SRGB = 61,
ASTC_2D_6X6 = 62,
ASTC_2D_6X6_SRGB = 63,
ASTC_2D_10X10 = 64,
ASTC_2D_10X10_SRGB = 65,
ASTC_2D_12X12 = 66,
ASTC_2D_12X12_SRGB = 67,
ASTC_2D_8X6 = 68,
ASTC_2D_8X6_SRGB = 69,
ASTC_2D_6X5 = 70,
ASTC_2D_6X5_SRGB = 71,
E5B9G9R9F = 72,
RGBA16S = 10,
RGBA16UI = 11,
R11FG11FB10F = 12,
RGBA32UI = 13,
DXT1 = 14,
DXT23 = 15,
DXT45 = 16,
DXN1 = 17, // This is also known as BC4
DXN2UNORM = 18,
DXN2SNORM = 19,
BC7U = 20,
BC6H_UF16 = 21,
BC6H_SF16 = 22,
ASTC_2D_4X4 = 23,
BGRA8 = 24,
RGBA32F = 25,
RG32F = 26,
R32F = 27,
R16F = 28,
R16U = 29,
R16S = 30,
R16UI = 31,
R16I = 32,
RG16 = 33,
RG16F = 34,
RG16UI = 35,
RG16I = 36,
RG16S = 37,
RGB32F = 38,
RGBA8_SRGB = 39,
RG8U = 40,
RG8S = 41,
RG32UI = 42,
RGBX16F = 43,
R32UI = 44,
R32I = 45,
ASTC_2D_8X8 = 46,
ASTC_2D_8X5 = 47,
ASTC_2D_5X4 = 48,
BGRA8_SRGB = 49,
DXT1_SRGB = 50,
DXT23_SRGB = 51,
DXT45_SRGB = 52,
BC7U_SRGB = 53,
R4G4B4A4U = 54,
ASTC_2D_4X4_SRGB = 55,
ASTC_2D_8X8_SRGB = 56,
ASTC_2D_8X5_SRGB = 57,
ASTC_2D_5X4_SRGB = 58,
ASTC_2D_5X5 = 59,
ASTC_2D_5X5_SRGB = 60,
ASTC_2D_10X8 = 61,
ASTC_2D_10X8_SRGB = 62,
ASTC_2D_6X6 = 63,
ASTC_2D_6X6_SRGB = 64,
ASTC_2D_10X10 = 65,
ASTC_2D_10X10_SRGB = 66,
ASTC_2D_12X12 = 67,
ASTC_2D_12X12_SRGB = 68,
ASTC_2D_8X6 = 69,
ASTC_2D_8X6_SRGB = 70,
ASTC_2D_6X5 = 71,
ASTC_2D_6X5_SRGB = 72,
E5B9G9R9F = 73,
MaxColorFormat,
// Depth formats
Z32F = 73,
Z16 = 74,
Z32F = 74,
Z16 = 75,
MaxDepthFormat,
// DepthStencil formats
Z24S8 = 75,
S8Z24 = 76,
Z32FS8 = 77,
Z24S8 = 76,
S8Z24 = 77,
Z32FS8 = 78,
MaxDepthStencilFormat,
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // R8UI
0, // RGBA16F
0, // RGBA16U
0, // RGBA16S
0, // RGBA16UI
0, // R11FG11FB10F
0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // R8UI
1, // RGBA16F
1, // RGBA16U
1, // RGBA16S
1, // RGBA16UI
1, // R11FG11FB10F
1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
8, // R8UI
64, // RGBA16F
64, // RGBA16U
64, // RGBA16S
64, // RGBA16UI
32, // R11FG11FB10F
128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // R8UI
SurfaceCompression::None, // RGBA16F
SurfaceCompression::None, // RGBA16U
SurfaceCompression::None, // RGBA16S
SurfaceCompression::None, // RGBA16UI
SurfaceCompression::None, // R11FG11FB10F
SurfaceCompression::None, // RGBA32UI

View File

@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
constexpr std::array<Table, 75> DefinitionTable = {{
constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
{TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
{TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
{TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
{TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
{TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},

View File

@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
params.height = tic.Height();
params.depth = tic.Depth();
params.pitch = params.is_tiled ? 0 : tic.Pitch();
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
params.depth = 1;
} else if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
params.num_levels = tic.max_mip_level + 1;

View File

@@ -104,6 +104,11 @@ public:
if (!cache_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
if (!IsTypeCompatible(tic.texture_type, entry)) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
if (guard_samplers) {
@@ -914,13 +919,15 @@ private:
params.width = 1;
params.height = 1;
params.depth = 1;
if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
params.depth = 6;
}
params.pitch = 4;
params.num_levels = 1;
params.emulated_levels = 1;
params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F;
params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
params.type = VideoCore::Surface::SurfaceType::ColorTexture;
auto surface = CreateSurface(0ULL, params);
invalid_memory.clear();
invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
surface->UploadTexture(invalid_memory);
surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
return siblings_table[static_cast<std::size_t>(format)];
}
/// Returns true the shader sampler entry is compatible with the TIC texture type.
static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
const VideoCommon::Shader::Sampler& entry) {
const auto shader_type = entry.GetType();
switch (tic_type) {
case Tegra::Texture::TextureType::Texture1D:
case Tegra::Texture::TextureType::Texture1DArray:
return shader_type == Tegra::Shader::TextureType::Texture1D;
case Tegra::Texture::TextureType::Texture1DBuffer:
// TODO(Rodrigo): Assume as valid for now
return true;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return shader_type == Tegra::Shader::TextureType::Texture2D;
case Tegra::Texture::TextureType::Texture2DArray:
return shader_type == Tegra::Shader::TextureType::Texture2D ||
shader_type == Tegra::Shader::TextureType::TextureCube;
case Tegra::Texture::TextureType::Texture3D:
return shader_type == Tegra::Shader::TextureType::Texture3D;
case Tegra::Texture::TextureType::TextureCubeArray:
case Tegra::Texture::TextureType::TextureCubemap:
if (shader_type == Tegra::Shader::TextureType::TextureCube) {
return true;
}
return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
}
UNREACHABLE();
return true;
}
struct FramebufferTargetInfo {
TSurface target;
TView view;

View File

@@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"(
QProgressBar {}
QProgressBar::chunk {})";
constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"(
QProgressBar {
background-color: black;
border: 2px solid white;
border-radius: 4px;
padding: 2px;
}
QProgressBar::chunk {
background-color: #0ab9e6;
width: 1px;
})";
constexpr char PROGRESSBAR_STYLE_BUILD[] = R"(
QProgressBar {
background-color: black;
@@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent)
stage_translations = {
{VideoCore::LoadCallbackStage::Prepare, tr("Loading...")},
{VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")},
{VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")},
{VideoCore::LoadCallbackStage::Complete, tr("Launching...")},
};
progressbar_style = {
{VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE},
{VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE},
{VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD},
{VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE},
};
@@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
}
// update labels and progress bar
if (stage == VideoCore::LoadCallbackStage::Decompile ||
stage == VideoCore::LoadCallbackStage::Build) {
if (stage == VideoCore::LoadCallbackStage::Build) {
ui->stage->setText(stage_translations[stage].arg(value).arg(total));
} else {
ui->stage->setText(stage_translations[stage]);