Compare commits

...

9 Commits

Author SHA1 Message Date
ReinUsesLisp
9a8c1745f1 gl_shader_decompiler: Implement image binding settings 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f96d50165f shader: Implement bindless images 2019-05-16 20:03:51 -03:00
ReinUsesLisp
f9f541470e shader: Decode SUST and implement backing image functionality 2019-05-16 20:03:51 -03:00
ReinUsesLisp
ce691745dc gl_rasterizer: Track texture buffer usage 2019-05-16 20:03:51 -03:00
ReinUsesLisp
1d59af8f7c video_core: Make ARB_buffer_storage a required extension 2019-05-16 20:03:50 -03:00
ReinUsesLisp
a6252257eb gl_rasterizer_cache: Use texture buffers to emulate texture buffers 2019-05-16 20:03:50 -03:00
ReinUsesLisp
dc5e5ac3b0 maxwell_3d: Partially implement texture buffers as 1D textures 2019-05-16 18:55:20 -03:00
ReinUsesLisp
4f612052b2 gl_shader_decompiler: Allow 1D textures to be texture buffers 2019-05-16 18:55:20 -03:00
ReinUsesLisp
89eef17670 shader: Implement texture buffers 2019-05-16 18:55:20 -03:00
30 changed files with 668 additions and 123 deletions

View File

@@ -70,6 +70,7 @@ set(HASH_FILES
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/shader/decode/half_set.cpp"
"${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/hfma2.cpp"
"${VIDEO_CORE}/shader/decode/image.cpp"
"${VIDEO_CORE}/shader/decode/integer_set.cpp"
"${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
"${VIDEO_CORE}/shader/decode/memory.cpp"

View File

@@ -89,6 +89,7 @@ add_library(video_core STATIC
shader/decode/conversion.cpp
shader/decode/memory.cpp
shader/decode/texture.cpp
shader/decode/image.cpp
shader/decode/float_set_predicate.cpp
shader/decode/integer_set_predicate.cpp
shader/decode/half_set_predicate.cpp

View File

@@ -432,14 +432,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
const auto r_type = tic_entry.r_type.Value();
const auto g_type = tic_entry.g_type.Value();
const auto b_type = tic_entry.b_type.Value();
const auto a_type = tic_entry.a_type.Value();
const auto r_type{tic_entry.r_type.Value()};
const auto g_type{tic_entry.g_type.Value()};
const auto b_type{tic_entry.b_type.Value()};
const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);

View File

@@ -122,6 +122,15 @@ union Sampler {
u64 value{};
};
union Image {
Image() = default;
constexpr explicit Image(u64 value) : value{value} {}
BitField<36, 13, u64> index;
u64 value;
};
} // namespace Tegra::Shader
namespace std {
@@ -340,6 +349,26 @@ enum class TextureMiscMode : u64 {
PTP,
};
enum class SurfaceDataMode : u64 {
P = 0,
D_BA = 1,
};
enum class OutOfBoundsStore : u64 {
Ignore = 0,
Clamp = 1,
Trap = 2,
};
enum class ImageType : u64 {
Texture1D = 0,
TextureBuffer = 1,
Texture1DArray = 2,
Texture2D = 3,
Texture2DArray = 4,
Texture3D = 5,
};
enum class IsberdMode : u64 {
None = 0,
Patch = 1,
@@ -394,7 +423,7 @@ enum class LmemLoadCacheManagement : u64 {
CV = 3,
};
enum class LmemStoreCacheManagement : u64 {
enum class StoreCacheManagement : u64 {
Default = 0,
CG = 1,
CS = 2,
@@ -796,7 +825,7 @@ union Instruction {
} ld_l;
union {
BitField<44, 2, LmemStoreCacheManagement> cache_management;
BitField<44, 2, StoreCacheManagement> cache_management;
} st_l;
union {
@@ -1207,6 +1236,20 @@ union Instruction {
}
} texs;
union {
BitField<28, 1, u64> is_array;
BitField<29, 2, TextureType> texture_type;
BitField<35, 1, u64> aoffi;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> ms; // Multisample?
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
union {
BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
@@ -1256,6 +1299,35 @@ union Instruction {
}
} tlds;
union {
BitField<24, 2, StoreCacheManagement> cache_management;
BitField<33, 3, ImageType> image_type;
BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
BitField<51, 1, u64> is_immediate;
BitField<52, 1, SurfaceDataMode> mode;
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
constexpr u8 B = 0b0100;
constexpr u8 A = 0b1000;
constexpr std::array<u8, 16> mask = {
0, (R), (G), (R | G), (B), (R | B),
(G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
(B | A), (R | B | A), (G | B | A), (R | G | B | A)};
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
} sust;
union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1347,6 +1419,7 @@ union Instruction {
Attribute attribute;
Sampler sampler;
Image image;
u64 value;
};
@@ -1381,11 +1454,13 @@ public:
TXQ, // Texture Query
TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLD, // Texture Load
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
EXIT,
IPA,
OUT_R, // Emit vertex/primitive
@@ -1516,6 +1591,7 @@ public:
Synch,
Memory,
Texture,
Image,
FloatSet,
FloatSetPredicate,
IntegerSet,
@@ -1651,11 +1727,13 @@ private:
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),

View File

@@ -24,6 +24,11 @@ Device::Device() {
has_variable_aoffi = TestVariableAoffi();
}
Device::Device(std::nullptr_t) {
uniform_buffer_alignment = 0;
has_variable_aoffi = true;
}
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
uniform sampler2D tex;

View File

@@ -10,7 +10,8 @@ namespace OpenGL {
class Device {
public:
Device();
explicit Device();
explicit Device(std::nullptr_t);
std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;

View File

@@ -29,8 +29,10 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using PixelFormat = VideoCore::Surface::PixelFormat;
using SurfaceType = VideoCore::Surface::SurfaceType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -119,11 +121,6 @@ void RasterizerOpenGL::CheckExtensions() {
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_buffer_storage) {
LOG_WARNING(
Render_OpenGL,
"Buffer storage control is not supported! This can cause performance degradation.");
}
}
GLuint RasterizerOpenGL::SetupVertexFormat() {
@@ -323,8 +320,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
const auto [program_handle, next_bindings] =
shader->GetProgramHandle(primitive_mode, base_bindings);
const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
SetupConstBuffers(stage_enum, shader, base_bindings);
SetupGlobalRegions(stage_enum, shader, base_bindings);
const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
@@ -342,11 +345,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
shader_config.enable.Value(), shader_config.offset);
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -809,8 +807,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -857,8 +854,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings) {
const Shader& shader, BaseBindings base_bindings) {
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
@@ -871,8 +867,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings) {
TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -881,6 +877,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
Tegra::Texture::FullTextureInfo texture;
@@ -894,18 +892,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
}
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
auto& unit{state.texture_units[current_bindpoint]};
unit.sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
surface->Texture(entry.IsArray()).handle;
if (surface->GetSurfaceParams().target == SurfaceTarget::TextureBuffer) {
// Record that this texture is a texture buffer.
texture_buffer_usage.set(bindpoint);
}
unit.texture = surface->Texture(entry.IsArray()).handle;
surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
state.texture_units[current_bindpoint].texture = 0;
unit.texture = 0;
}
}
return texture_buffer_usage;
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {

View File

@@ -106,16 +106,16 @@ private:
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
BaseBindings base_bindings);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLenum primitive_mode,
BaseBindings base_bindings);
const Shader& shader, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
GLuint program_handle, BaseBindings base_bindings);
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);

View File

@@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
if (!params.is_tiled) {
if (config.tic.IsLineal()) {
params.pitch = config.tic.Pitch();
}
params.unaligned_height = config.tic.Height();
@@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
switch (params.target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
params.depth = 1;
break;
@@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
return GL_TEXTURE_1D;
case SurfaceTarget::TextureBuffer:
return GL_TEXTURE_BUFFER;
case SurfaceTarget::Texture2D:
return GL_TEXTURE_2D;
case SurfaceTarget::Texture3D:
@@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
switch (params.target) {
case SurfaceTarget::Texture1D:
glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width);
glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
break;
case SurfaceTarget::TextureBuffer:
texture_buffer.Create();
glNamedBufferStorage(texture_buffer.handle,
params.width * GetBytesPerPixel(params.pixel_format), nullptr,
GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
break;
case SurfaceTarget::Texture3D:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubeArray:
glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height, params.depth);
glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
width, height);
glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
}
ApplyTextureDefaults(texture.handle, params.max_mip_level);
if (params.target != SurfaceTarget::TextureBuffer) {
ApplyTextureDefaults(texture.handle, params.max_mip_level);
}
OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
}
@@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t
glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::TextureBuffer:
ASSERT(mip_map == 0);
glNamedBufferSubData(texture_buffer.handle, x0,
static_cast<GLsizeiptr>(rect.GetWidth()) *
GetBytesPerPixel(params.pixel_format),
&gl_buffer[mip_map][buffer_offset]);
break;
case SurfaceTarget::Texture2D:
glTextureSubImage2D(texture.handle, mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
@@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
Tegra::Texture::SwizzleSource swizzle_y,
Tegra::Texture::SwizzleSource swizzle_z,
Tegra::Texture::SwizzleSource swizzle_w) {
if (params.target == SurfaceTarget::TextureBuffer) {
return;
}
const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);

View File

@@ -250,6 +250,8 @@ struct SurfaceParams {
switch (target) {
case SurfaceTarget::Texture1D:
return "1D";
case SurfaceTarget::TextureBuffer:
return "Buffer";
case SurfaceTarget::Texture2D:
return "2D";
case SurfaceTarget::Texture3D:
@@ -439,6 +441,7 @@ private:
OGLTexture texture;
OGLTexture discrepant_view;
OGLBuffer texture_buffer;
SurfaceParams params{};
GLenum gl_target{};
GLenum gl_internal_format{};

View File

@@ -164,8 +164,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
}
CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
GLenum primitive_mode, bool hint_retrievable = false) {
Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
bool hint_retrievable = false) {
auto base_bindings{variant.base_bindings};
const auto primitive_mode{variant.primitive_mode};
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = "#version 430 core\n";
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
@@ -181,6 +185,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
for (const auto& image : entries.images) {
source +=
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
}
// Transform 1D textures to texture samplers by declaring its preprocessor macros.
for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
if (!texture_buffer_usage.test(i)) {
continue;
}
source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
}
if (program_type == Maxwell::ShaderProgram::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
@@ -256,20 +272,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
shader_length = entries.shader_length;
}
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings) {
std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
GLuint handle{};
if (program_type == Maxwell::ShaderProgram::Geometry) {
handle = GetGeometryShader(primitive_mode, base_bindings);
handle = GetGeometryShader(variant);
} else {
const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
const auto [entry, is_cache_miss] = programs.try_emplace(variant);
auto& program = entry->second;
if (is_cache_miss) {
program = TryLoadProgram(primitive_mode, base_bindings);
program = TryLoadProgram(variant);
if (!program) {
program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
@@ -278,6 +292,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
handle = program->handle;
}
auto base_bindings{variant.base_bindings};
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -285,43 +300,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
return {handle, base_bindings};
}
GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
auto& programs = entry->second;
switch (primitive_mode) {
switch (variant.primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines, variant);
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.lines_adjacency, variant);
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles, variant);
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.triangles_adjacency, variant);
default:
UNREACHABLE_MSG("Unknown primitive mode.");
return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
return LazyGeometryProgram(programs.points, variant);
}
}
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode) {
GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
const ProgramVariant& variant) {
if (target_program) {
return target_program->handle;
}
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
target_program = TryLoadProgram(primitive_mode, base_bindings);
const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
target_program = TryLoadProgram(variant);
if (!target_program) {
target_program =
SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
target_program = SpecializeShader(code, entries, program_type, variant);
disk_cache.SaveUsage(GetUsage(variant));
}
LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
@@ -329,18 +343,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
return target_program->handle;
};
CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
BaseBindings base_bindings) const {
const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
const auto found = precompiled_programs.find(GetUsage(variant));
if (found == precompiled_programs.end()) {
return {};
}
return found->second;
}
ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
BaseBindings base_bindings) const {
return {unique_identifier, base_bindings, primitive_mode};
ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
ShaderDiskCacheUsage usage;
usage.unique_identifier = unique_identifier;
usage.variant = variant;
return usage;
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -394,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
if (!shader) {
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
usage.bindings, usage.primitive, true);
usage.variant, true);
}
precompiled_programs.insert({usage, std::move(shader)});

View File

@@ -6,6 +6,7 @@
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <set>
#include <tuple>
@@ -22,7 +23,7 @@
namespace Core {
class System;
} // namespace Core
}
namespace OpenGL {
@@ -63,8 +64,7 @@ public:
}
/// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
BaseBindings base_bindings);
std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
private:
// Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -78,15 +78,14 @@ private:
CachedProgram triangles_adjacency;
};
GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
GLuint GetGeometryShader(const ProgramVariant& variant);
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
GLenum primitive_mode);
GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);
CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
u8* host_ptr{};
VAddr cpu_addr{};
@@ -100,8 +99,8 @@ private:
std::string code;
std::unordered_map<BaseBindings, CachedProgram> programs;
std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
std::unordered_map<ProgramVariant, CachedProgram> programs;
std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;
std::unordered_map<u32, GLuint> cbuf_resource_cache;
std::unordered_map<u32, GLuint> gmem_resource_cache;

View File

@@ -152,6 +152,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
DeclareImages();
code.AddLine("void execute_" + suffix + "() {");
++code.scope;
@@ -204,6 +205,9 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
}
for (const auto& gmem_pair : ir.GetGlobalMemory()) {
const auto& [base, usage] = gmem_pair;
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
@@ -402,9 +406,13 @@ private:
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
const std::string name{GetSampler(sampler)};
const std::string description{"layout (binding = SAMPLER_BINDING_" +
std::to_string(sampler.GetIndex()) + ") uniform "};
std::string sampler_type = [&]() {
switch (sampler.GetType()) {
case Tegra::Shader::TextureType::Texture1D:
// Special cased, read below.
return "sampler1D";
case Tegra::Shader::TextureType::Texture2D:
return "sampler2D";
@@ -422,13 +430,54 @@ private:
if (sampler.IsShadow())
sampler_type += "Shadow";
code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) {
// 1D textures can be aliased to texture buffers, hide the declarations behind a
// preprocessor flag and use one or the other from the GPU state. This has to be
// done because shaders don't have enough information to determine the texture type.
EmitIfdefIsBuffer(sampler);
code.AddLine(description + "samplerBuffer " + name + ';');
code.AddLine("#else");
code.AddLine(description + sampler_type + ' ' + name + ';');
code.AddLine("#endif");
} else {
// The other texture types (2D, 3D and cubes) don't have this issue.
code.AddLine(description + sampler_type + ' ' + name + ';');
}
}
if (!samplers.empty())
code.AddNewLine();
}
void DeclareImages() {
const auto& images{ir.GetImages()};
for (const auto& image : images) {
const std::string image_type = [&]() {
switch (image.GetType()) {
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
case Tegra::Shader::ImageType::TextureBuffer:
return "bufferImage";
case Tegra::Shader::ImageType::Texture1DArray:
return "image1DArray";
case Tegra::Shader::ImageType::Texture2D:
return "image2D";
case Tegra::Shader::ImageType::Texture2DArray:
return "image2DArray";
case Tegra::Shader::ImageType::Texture3D:
return "image3D";
default:
UNREACHABLE();
return "image1D";
}
}();
code.AddLine("layout (binding = IMAGE_BINDING_" + std::to_string(image.GetIndex()) +
") coherent volatile writeonly uniform " + image_type + ' ' +
GetImage(image) + ';');
}
if (!images.empty())
code.AddNewLine();
}
void VisitBlock(const NodeBlock& bb) {
for (const Node node : bb) {
if (const std::string expr = Visit(node); !expr.empty()) {
@@ -1313,13 +1362,61 @@ private:
else if (next < count)
expr += ", ";
}
// Store a copy of the expression without the lod to be used with texture buffers
std::string expr_buffer = expr;
if (meta->lod) {
expr += ", ";
expr += CastOperand(Visit(meta->lod), Type::Int);
}
expr += ')';
expr += GetSwizzle(meta->element);
return expr + GetSwizzle(meta->element);
expr_buffer += ')';
expr_buffer += GetSwizzle(meta->element);
const std::string tmp{code.GenerateTemporary()};
EmitIfdefIsBuffer(meta->sampler);
code.AddLine("float " + tmp + " = " + expr_buffer + ';');
code.AddLine("#else");
code.AddLine("float " + tmp + " = " + expr + ';');
code.AddLine("#endif");
return tmp;
}
std::string ImageStore(Operation operation) {
constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
std::string expr = "imageStore(";
expr += GetImage(meta.image);
expr += ", ";
const std::size_t coords_count{operation.GetOperandsCount()};
expr += constructors.at(coords_count - 1);
for (std::size_t i = 0; i < coords_count; ++i) {
expr += VisitOperand(operation, i, Type::Int);
if (i + 1 < coords_count) {
expr += ", ";
}
}
expr += "), ";
const std::size_t values_count{meta.values.size()};
UNIMPLEMENTED_IF(values_count != 4);
expr += "vec4(";
for (std::size_t i = 0; i < values_count; ++i) {
expr += Visit(meta.values.at(i));
if (i + 1 < values_count) {
expr += ", ";
}
}
expr += "));";
code.AddLine(expr);
return {};
}
std::string Branch(Operation operation) {
@@ -1570,6 +1667,8 @@ private:
&GLSLDecompiler::TextureQueryLod,
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::ImageStore,
&GLSLDecompiler::Branch,
&GLSLDecompiler::PushFlowStack,
&GLSLDecompiler::PopFlowStack,
@@ -1636,6 +1735,14 @@ private:
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
}
void EmitIfdefIsBuffer(const Sampler& sampler) {
code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()));
}
std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
return name + '_' + std::to_string(index) + '_' + suffix;
}

View File

@@ -27,6 +27,7 @@ struct ShaderEntries;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ProgramResult = std::pair<std::string, ShaderEntries>;
using SamplerEntry = VideoCommon::Shader::Sampler;
using ImageEntry = VideoCommon::Shader::Image;
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
public:
@@ -74,6 +75,7 @@ struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<SamplerEntry> bindless_samplers;
std::vector<ImageEntry> images;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};

View File

@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
Dump,
};
constexpr u32 NativeVersion = 1;
constexpr u32 NativeVersion = 3;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 12);
static_assert(sizeof(ShaderDiskCacheUsage) == 24);
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ShaderDiskCacheUsage) == 40);
namespace {
@@ -286,7 +286,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(code_size)) {
return {};
}
std::vector<u8> code(code_size);
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
@@ -299,7 +298,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
return {};
}
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
@@ -315,7 +313,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(samplers_count)) {
return {};
}
for (u32 i = 0; i < samplers_count; ++i) {
u64 offset{};
u64 index{};
@@ -334,11 +331,28 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 images_count{};
if (!LoadObjectFromPrecompiled(images_count)) {
return {};
}
for (u32 i = 0; i < images_count; ++i) {
u64 offset{};
u64 index{};
u32 type{};
u8 is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
}
u32 global_memory_count{};
if (!LoadObjectFromPrecompiled(global_memory_count)) {
return {};
}
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
@@ -363,7 +377,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
if (!LoadObjectFromPrecompiled(shader_length)) {
return {};
}
entry.entries.shader_length = static_cast<std::size_t>(shader_length);
return entry;
@@ -403,6 +416,18 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
return false;
}
for (const auto& image : entries.images) {
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
!SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
return false;
}
}
if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
return false;
}

View File

@@ -33,14 +33,18 @@ namespace OpenGL {
using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
/// Allocated bindings used by an OpenGL shader program
using TextureBufferUsage = std::bitset<64>;
/// Allocated bindings used by an OpenGL shader program.
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
u32 image{};
bool operator==(const BaseBindings& rhs) const {
return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
return std::tie(cbuf, gmem, sampler, image) ==
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
}
bool operator!=(const BaseBindings& rhs) const {
@@ -48,15 +52,29 @@ struct BaseBindings {
}
};
/// Describes how a shader is used
/// Describes the different variants a single program can be compiled.
struct ProgramVariant {
BaseBindings base_bindings;
GLenum primitive_mode{};
TextureBufferUsage texture_buffer_usage{};
bool operator==(const ProgramVariant& rhs) const {
return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
}
bool operator!=(const ProgramVariant& rhs) const {
return !operator==(rhs);
}
};
/// Describes how a shader is used.
struct ShaderDiskCacheUsage {
u64 unique_identifier{};
BaseBindings bindings;
GLenum primitive{};
ProgramVariant variant;
bool operator==(const ShaderDiskCacheUsage& rhs) const {
return std::tie(unique_identifier, bindings, primitive) ==
std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
}
bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -71,7 +89,19 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
return static_cast<std::size_t>(bindings.cbuf) ^
(static_cast<std::size_t>(bindings.gmem) << 8) ^
(static_cast<std::size_t>(bindings.sampler) << 16) ^
(static_cast<std::size_t>(bindings.image) << 24);
}
};
template <>
struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const {
return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
(static_cast<std::size_t>(variant.primitive_mode) << 6);
}
};
@@ -79,7 +109,7 @@ template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
std::hash<OpenGL::ProgramVariant>()(usage.variant);
}
};
@@ -264,16 +294,17 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
// Copre system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
std::size_t precompiled_cache_virtual_file_offset;
// Stored transferable shaders
std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
// The cache has been loaded at boot
bool tried_to_load{};
};

View File

@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
bool use_persistent)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
if (GLAD_GL_ARB_buffer_storage) {
if (use_persistent) {
persistent = true;
coherent = prefer_coherent;
const GLbitfield flags =

View File

@@ -13,7 +13,8 @@ namespace OpenGL {
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
bool use_persistent = true);
~OGLStreamBuffer();
GLuint GetHandle() const;

View File

@@ -472,7 +472,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
}
}
/// Initialize the renderer
bool RendererOpenGL::Init() {
Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

View File

@@ -930,6 +930,11 @@ private:
return {};
}
Id ImageStore(Operation operation) {
UNIMPLEMENTED();
return {};
}
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1282,6 +1287,8 @@ private:
&SPIRVDecompiler::TextureQueryLod,
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::PushFlowStack,
&SPIRVDecompiler::PopFlowStack,

View File

@@ -168,6 +168,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::Image, &ShaderIR::DecodeImage},
{OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
{OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
{OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},

View File

@@ -0,0 +1,115 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
namespace {
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
switch (image_type) {
case Tegra::Shader::ImageType::Texture1D:
case Tegra::Shader::ImageType::TextureBuffer:
return 1;
case Tegra::Shader::ImageType::Texture1DArray:
case Tegra::Shader::ImageType::Texture2D:
return 2;
case Tegra::Shader::ImageType::Texture2DArray:
case Tegra::Shader::ImageType::Texture3D:
return 3;
}
UNREACHABLE();
return 1;
}
} // Anonymous namespace
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
std::vector<Node> values;
constexpr std::size_t hardcoded_size{4};
for (std::size_t i = 0; i < hardcoded_size; ++i) {
values.push_back(GetRegister(instr.gpr0.Value() + i));
}
std::vector<Node> coords;
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
for (std::size_t i = 0; i < num_coords; ++i) {
coords.push_back(GetRegister(instr.gpr8.Value() + i));
}
const auto type{instr.sust.image_type};
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
: GetBindlessImage(instr.gpr39, type)};
MetaImage meta{image, values};
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
bb.push_back(store);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
}
return pc;
}
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == offset; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{offset, next_index, type};
return *used_images.emplace(entry).first;
}
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
Tegra::Shader::ImageType type) {
const Node image_register{GetRegister(reg)};
const Node base_image{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf{std::get_if<CbufNode>(base_image)};
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(cbuf->GetOffset())};
const auto cbuf_offset{cbuf_offset_imm->GetValue()};
const auto cbuf_index{cbuf->GetIndex()};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
// If this image has already been used, return the existing mapping.
const auto itr{std::find_if(used_images.begin(), used_images.end(),
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
if (itr != used_images.end()) {
ASSERT(itr->GetType() == type);
return *itr;
}
// Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
const Image entry{cbuf_index, cbuf_offset, next_index, type};
return *used_images.emplace(entry).first;
}
} // namespace VideoCommon::Shader

View File

@@ -244,6 +244,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::TLD: {
UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
if (instr.tld.nodep_flag) {
LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
}
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
@@ -574,6 +586,38 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
return values;
}
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
const bool is_array{instr.tld.is_array};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
u64 gpr8_cursor{instr.gpr8.Value()};
const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
std::vector<Node> coords;
for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(gpr8_cursor++));
}
u64 gpr20_cursor{instr.gpr20.Value()};
// const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
}
Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;

View File

@@ -172,6 +172,8 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
ImageStore, /// (MetaImage, float[N] coords) -> void
Branch, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
PopFlowStack, /// () -> void
@@ -267,6 +269,48 @@ private:
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class Image {
public:
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
: offset{offset}, index{index}, type{type}, is_bindless{false} {}
explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
Tegra::Shader::ImageType type)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
is_bindless{true} {}
explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
bool is_bindless)
: offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
}
std::size_t GetIndex() const {
return index;
}
Tegra::Shader::ImageType GetType() const {
return type;
}
bool IsBindless() const {
return is_bindless;
}
bool operator<(const Image& rhs) const {
return std::tie(offset, index, type, is_bindless) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
}
private:
std::size_t offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
};
class ConstBuffer {
public:
explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -328,10 +372,15 @@ struct MetaTexture {
u32 element{};
};
struct MetaImage {
const Image& image;
std::vector<Node> values;
};
inline constexpr MetaArithmetic PRECISE = {true};
inline constexpr MetaArithmetic NO_PRECISE = {false};
using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, Tegra::Shader::HalfType>;
/// Holds any kind of operation that can be done in the IR
class OperationNode final {
@@ -602,6 +651,10 @@ public:
return used_samplers;
}
const std::set<Image>& GetImages() const {
return used_images;
}
const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
const {
return used_clip_distances;
@@ -648,6 +701,7 @@ private:
u32 DecodeConversion(NodeBlock& bb, u32 pc);
u32 DecodeMemory(NodeBlock& bb, u32 pc);
u32 DecodeTexture(NodeBlock& bb, u32 pc);
u32 DecodeImage(NodeBlock& bb, u32 pc);
u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -767,6 +821,12 @@ private:
Tegra::Shader::TextureType type, bool is_array,
bool is_shadow);
/// Accesses an image.
const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
/// Access a bindless image sampler.
const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -790,6 +850,8 @@ private:
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -877,6 +939,7 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
std::set<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;

View File

@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
switch (texture_type) {
case Tegra::Texture::TextureType::Texture1D:
return SurfaceTarget::Texture1D;
case Tegra::Texture::TextureType::Texture1DBuffer:
return SurfaceTarget::TextureBuffer;
case Tegra::Texture::TextureType::Texture2D:
case Tegra::Texture::TextureType::Texture2DNoMipmap:
return SurfaceTarget::Texture2D;
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
bool SurfaceTargetIsLayered(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
return false;
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
bool SurfaceTargetIsArray(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
case SurfaceTarget::TextureBuffer:
case SurfaceTarget::Texture2D:
case SurfaceTarget::Texture3D:
case SurfaceTarget::TextureCubemap:

View File

@@ -114,6 +114,7 @@ enum class SurfaceType {
enum class SurfaceTarget {
Texture1D,
TextureBuffer,
Texture2D,
Texture3D,
Texture1DArray,

View File

@@ -172,12 +172,16 @@ struct TICEntry {
BitField<26, 1, u32> use_header_opt_control;
BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
BitField<0, 16, u32> buffer_high_width_minus_one;
};
union {
BitField<0, 16, u32> width_minus_1;
BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
BitField<29, 3, u32> border_size;
BitField<0, 16, u32> buffer_low_width_minus_one;
};
union {
BitField<0, 16, u32> height_minus_1;
@@ -206,7 +210,10 @@ struct TICEntry {
}
u32 Width() const {
return width_minus_1 + 1;
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_1 + 1;
}
return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
}
u32 Height() const {
@@ -240,6 +247,15 @@ struct TICEntry {
header_version == TICHeaderVersion::BlockLinearColorKey;
}
bool IsLineal() const {
return header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey;
}
bool IsBuffer() const {
return header_version == TICHeaderVersion::OneDBuffer;
}
bool IsSrgbConversionEnabled() const {
return srgb_conversion != 0;
}

View File

@@ -742,6 +742,8 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
QStringList GMainWindow::GetUnsupportedGLExtensions() {
QStringList unsupported_ext;
if (!GLAD_GL_ARB_buffer_storage)
unsupported_ext.append("ARB_buffer_storage");
if (!GLAD_GL_ARB_direct_state_access)
unsupported_ext.append("ARB_direct_state_access");
if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)

View File

@@ -142,6 +142,8 @@ void EmuWindow_SDL2::Fullscreen() {
bool EmuWindow_SDL2::SupportsRequiredGLExtensions() {
std::vector<std::string> unsupported_ext;
if (!GLAD_GL_ARB_buffer_storage)
unsupported_ext.push_back("ARB_buffer_storage");
if (!GLAD_GL_ARB_direct_state_access)
unsupported_ext.push_back("ARB_direct_state_access");
if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev)