Compare commits

...

6 Commits

Author SHA1 Message Date
bunnei
fd98bf1339 gl_global_cache: Ensure buffer size does not exceed UBO maximum.
- Fixes crash with Xenoblade Chronicles 2.
2018-11-26 18:16:51 -05:00
bunnei
a6714f738c gl_global_cache: Optimize caching to eliminate unnecessary resource management. 2018-11-26 18:16:46 -05:00
bunnei
e2d3a428a0 gl_rasterizer: Cache global region uniform locations and refactor. 2018-11-26 18:16:25 -05:00
bunnei
a2d9e4d610 gl_global_cache: Use const reference for GetGlobalRegion argument. 2018-11-26 18:16:25 -05:00
Zach Hilman
df17b43333 gl_rasterizer: Add caching for global memory regions 2018-11-26 18:16:21 -05:00
Zach Hilman
0698f7f6e7 Preliminary implementation of LDG
Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.
2018-11-26 18:09:26 -05:00
12 changed files with 377 additions and 23 deletions

View File

@@ -30,6 +30,8 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp

View File

@@ -319,6 +319,11 @@ void Maxwell3D::DrawArrays() {
}
}
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs) {
return std::tie(lhs.cbuf_index, lhs.cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <set>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -31,6 +32,12 @@ public:
explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~Maxwell3D() = default;
/// Structure representing a global memory region
struct GlobalMemoryDescriptor {
u64 cbuf_index;
u64 cbuf_offset;
};
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
@@ -1037,6 +1044,8 @@ public:
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
std::set<GlobalMemoryDescriptor> global_memory_uniforms;
};
State state{};
@@ -1069,6 +1078,9 @@ public:
return macro_memory;
}
std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
private:
void InitializeRegisterDefaults();
@@ -1123,6 +1135,9 @@ private:
void DrawArrays();
};
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs);
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")

View File

@@ -1,4 +1,4 @@
// Copyright 2018 yuzu Emulator Project
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -208,6 +208,8 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class StoreType : u64 {
@@ -779,6 +781,12 @@ union Instruction {
BitField<44, 2, u64> unknown;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> offset_immediate;
} ld_g;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;

View File

@@ -0,0 +1,96 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
buffer.Create();
LabelGLObject(GL_BUFFER, buffer.handle, addr);
}
/// Helper function to get the maximum size we can use for an OpenGL uniform block
static u32 GetMaxUniformBlockSize() {
GLint max_size{};
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_size);
return static_cast<u32>(max_size);
}
void CachedGlobalRegion::Reload(u32 size_) {
static const u32 max_size{GetMaxUniformBlockSize()};
size = size_;
if (size > max_size) {
size = max_size;
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded max UBO size of {}!", size_, max_size);
}
glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
glBufferData(GL_UNIFORM_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
}
return search->second;
}
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size);
ReserveGlobalRegion(region);
}
region->Reload(size);
return region;
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
reserve[region->GetAddr()] = region;
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.cbuf_index].address + global_region.cbuf_offset)};
ASSERT(cbuf_addr);
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
ASSERT(actual_addr);
// Look up global region in the cache based on address
GlobalRegion region{TryGet(*actual_addr)};
if (!region) {
// No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size);
Register(region);
}
return region;
}
} // namespace OpenGL

View File

@@ -0,0 +1,89 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <fmt/format.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class RasterizerOpenGL;
class CachedGlobalRegion;
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
/// Helper class for caching global region uniform locations
class CachedGlobalRegionUniform {
public:
explicit CachedGlobalRegionUniform(std::size_t index) : index{index} {}
std::string GetName() const {
return fmt::format("global_memory_region_declblock_{}", index);
}
u32 GetHash() const {
// This needs to be unique from ConstBufferEntry::GetHash and SamplerEntry::GetHash
return (static_cast<u32>(index) << 16) | 0xFFFF;
}
private:
std::size_t index{};
};
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
CachedGlobalRegion(VAddr addr, u32 size);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const {
return addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const {
return size;
}
/// Gets the GL program handle for the buffer
GLuint GetBufferHandle() const {
return buffer.handle;
}
/// Reloads the global region from guest memory
void Reload(u32 size_);
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
private:
VAddr addr;
u32 size;
OGLBuffer buffer;
};
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Gets the current specified shader stage program
GlobalRegion GetGlobalRegion(
const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
void ReserveGlobalRegion(const GlobalRegion& region);
std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL

View File

@@ -81,7 +81,7 @@ struct DrawParameters {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -267,7 +267,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_buffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,9 +321,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint =
current_buffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
current_buffer_bindpoint);
// Configure global memory regions for this shader stage.
current_buffer_bindpoint =
SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_buffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -695,6 +700,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
@@ -919,6 +925,29 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
std::size_t global_region_index{};
const auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
for (const auto& global_region : maxwell3d.state.global_memory_uniforms) {
const auto& region{
global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage))};
const GLenum b_index{
shader->GetProgramResourceIndex(CachedGlobalRegionUniform{global_region_index})};
if (b_index != GL_INVALID_INDEX) {
glBindBufferBase(GL_UNIFORM_BUFFER, current_bindpoint, region->GetBufferHandle());
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), b_index,
current_bindpoint);
++current_bindpoint;
}
++global_region_index;
}
return current_bindpoint;
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);

View File

@@ -23,6 +23,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -118,7 +119,7 @@ private:
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
/*
/**
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
@@ -128,7 +129,17 @@ private:
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/*
/**
* Configures the current global memory regions to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/**
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
@@ -193,6 +204,7 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
Core::Frontend::EmuWindow& emu_window;

View File

@@ -98,18 +98,6 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
}
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {

View File

@@ -71,7 +71,18 @@ public:
}
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
template <typename T>
GLuint GetProgramResourceIndex(const T& buffer) {
const auto& search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK,
buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);

View File

@@ -13,6 +13,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -585,6 +586,7 @@ public:
GenerateInputAttrs();
GenerateOutputAttrs();
GenerateConstBuffers();
GenerateGlobalRegions();
GenerateSamplers();
GenerateGeometry();
}
@@ -706,6 +708,21 @@ private:
declarations.AddNewLine();
}
/// Generates declarations for global memory regions.
void GenerateGlobalRegions() {
const auto& regions{
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms};
for (std::size_t i = 0; i < regions.size(); ++i) {
declarations.AddLine("layout(std140) uniform " +
fmt::format("global_memory_region_declblock_{}", i));
declarations.AddLine('{');
declarations.AddLine(" vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
}
/// Generates declarations for samplers.
void GenerateSamplers() {
const auto& samplers = GetSamplers();
@@ -1834,6 +1851,11 @@ private:
} else {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::Integer);
if (opcode->get().GetId() == OpCode::Id::IADD_C) {
s_last_iadd = last_iadd;
last_iadd = IADDReference{instr.gpr8.Value(), instr.cbuf34.index,
instr.cbuf34.offset};
}
}
}
@@ -3126,6 +3148,72 @@ private:
shader.AddLine('}');
break;
}
case OpCode::Id::LDG: {
// Determine number of GPRs to fill with data
u64 count = 1;
switch (instr.ld_g.type) {
case Tegra::Shader::UniformType::Single:
count = 1;
break;
case Tegra::Shader::UniformType::Double:
count = 2;
break;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
count = 4;
break;
default:
UNREACHABLE_MSG("Unimplemented LDG size!");
}
auto [gpr_index, index, offset] = last_iadd;
// The last IADD might be the upper u32 of address, so instead take the one before
// that.
if (gpr_index == Register::ZeroIndex) {
gpr_index = s_last_iadd.out;
index = s_last_iadd.cbuf_index;
offset = s_last_iadd.cbuf_offset;
}
const auto gpr = regs.GetRegisterAsInteger(gpr_index);
const auto constbuffer =
regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms.insert(
{index, offset * 4});
const auto memory = fmt::format("global_memory_region_{}",
Core::System::GetInstance()
.GPU()
.Maxwell3D()
.state.global_memory_uniforms.size() -
1);
const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
const auto o_register = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
const auto address = "( " + immediate + " + " + o_register + " )";
const auto base_sub = address + " - " + constbuffer;
// New scope to prevent potential conflicts
shader.AddLine('{');
++shader.scope;
shader.AddLine("uint final_offset = " + base_sub + ";");
for (std::size_t out = 0; out < count; ++out) {
const u64 reg_id = instr.gpr0.Value() + out;
const auto this_memory =
fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
memory, out * 4, out * 4);
regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
}
--shader.scope;
shader.AddLine('}');
break;
}
default: {
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
@@ -3923,9 +4011,18 @@ private:
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
struct IADDReference {
Register out;
u64 cbuf_index;
u64 cbuf_offset;
};
IADDReference last_iadd{};
IADDReference s_last_iadd{};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace OpenGL::GLShader::Decompiler
};
std::string GetCommonDeclarations() {
return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",

View File

@@ -57,7 +57,8 @@ public:
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | index;
// This needs to be unique from CachedGlobalRegionUniform::GetHash
return (static_cast<u32>(stage) << 12) | index;
}
private:
@@ -138,7 +139,8 @@ public:
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
// This needs to be unique from CachedGlobalRegionUniform::GetHash
return (static_cast<u32>(stage) << 12) | static_cast<u32>(sampler_index);
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {