Compare commits

...

2 Commits

Author SHA1 Message Date
Zach Hilman
43215bc907 gl_rasterizer: Add caching for global memory regions 2018-10-17 18:53:42 -04:00
Zach Hilman
b54a50992c Preliminary implementation of LDG
Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.
2018-10-17 18:43:37 -04:00
9 changed files with 261 additions and 1 deletions

View File

@@ -27,6 +27,8 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp

View File

@@ -246,6 +246,11 @@ void Maxwell3D::DrawArrays() {
}
}
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs) {
return std::tie(lhs.cbuf_index, lhs.cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <set>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -31,6 +32,12 @@ public:
explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~Maxwell3D() = default;
/// Structure representing a global memory region
struct GlobalMemoryDescriptor {
u64 cbuf_index;
u64 cbuf_offset;
};
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
@@ -961,6 +968,8 @@ public:
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
std::set<GlobalMemoryDescriptor> global_memory_uniforms;
};
State state{};
@@ -1023,6 +1032,9 @@ private:
void DrawArrays();
};
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs);
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")

View File

@@ -205,6 +205,8 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class IMinMaxExchange : u64 {
@@ -658,6 +660,12 @@ union Instruction {
BitField<44, 2, u64> unknown;
} ld_c;
union {
BitField<48, 3, UniformType> size;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> offset_immediate;
} ld_g;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;

View File

@@ -0,0 +1,55 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/utils.h"
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
std::vector<u8> new_data(size);
Memory::ReadBlock(addr, new_data.data(), new_data.size());
buffer.Create();
glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
glBufferData(GL_UNIFORM_BUFFER, new_data.size(), new_data.data(), GL_STATIC_READ);
VideoCore::LabelGLObject(GL_BUFFER, buffer.handle, addr);
}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.cbuf_index].address + global_region.cbuf_offset)};
ASSERT(cbuf_addr != boost::none);
const auto actual_addr_gpu = Memory::Read64(cbuf_addr.get());
const auto size = Memory::Read32(cbuf_addr.get() + 8);
const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
ASSERT(actual_addr != boost::none);
// Look up global region in the cache based on address
GlobalRegion region{TryGet(*actual_addr)};
if (!region) {
// No global region found - create a new one
region = std::make_shared<CachedGlobalRegion>(*actual_addr, size);
Register(region);
}
return region;
}
} // namespace OpenGL

View File

@@ -0,0 +1,57 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <map>
#include <memory>
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace OpenGL {
class CachedGlobalRegion;
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
CachedGlobalRegion(VAddr addr, u32 size);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const {
return addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const {
return size;
}
/// Gets the GL program handle for the buffer
GLuint GetBufferHandle() const {
return buffer.handle;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
private:
VAddr addr;
u32 size;
OGLBuffer buffer;
};
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
/// Gets the current specified shader stage program
GlobalRegion GetGlobalRegion(Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
};
} // namespace OpenGL

View File

@@ -328,6 +328,27 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
auto& regions = maxwell3d.state.global_memory_uniforms;
size_t i = 0;
for (const auto& global_region : regions) {
const auto region = global_cache.GetGlobalRegion(
global_region, static_cast<Maxwell::ShaderStage>(stage));
const auto uniform_name = fmt::format("global_memory_region_declblock_{}", i);
const auto b_index = glGetProgramResourceIndex(shader->GetProgramHandle(primitive_mode),
GL_UNIFORM_BLOCK, uniform_name.c_str());
if (b_index != GL_INVALID_INDEX) {
glBindBufferBase(GL_UNIFORM_BUFFER, current_constbuffer_bindpoint,
region->GetBufferHandle());
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), b_index,
current_constbuffer_bindpoint);
++current_constbuffer_bindpoint;
}
++i;
}
}
state.Apply();
@@ -648,6 +669,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}

View File

@@ -23,6 +23,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -183,6 +184,7 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
Core::Frontend::EmuWindow& emu_window;

View File

@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -486,6 +487,7 @@ public:
GenerateInputAttrs();
GenerateOutputAttrs();
GenerateConstBuffers();
GenerateGlobalRegions();
GenerateSamplers();
GenerateGeometry();
}
@@ -605,6 +607,21 @@ private:
declarations.AddNewLine();
}
/// Generates declarations for global memory regions.
void GenerateGlobalRegions() {
const auto& regions{
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms};
for (size_t i = 0; i < regions.size(); ++i) {
declarations.AddLine("layout(std140) uniform " +
fmt::format("global_memory_region_declblock_{}", i));
declarations.AddLine('{');
declarations.AddLine(" vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
}
/// Generates declarations for samplers.
void GenerateSamplers() {
const auto& samplers = GetSamplers();
@@ -1520,6 +1537,11 @@ private:
} else {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::Integer);
if (opcode->GetId() == OpCode::Id::IADD_C) {
s_last_iadd = last_iadd;
last_iadd = IADDReference{instr.gpr8.Value(), instr.cbuf34.index,
instr.cbuf34.offset};
}
}
}
@@ -2512,6 +2534,72 @@ private:
shader.AddLine('}');
break;
}
case OpCode::Id::LDG: {
// Determine number of GPRs to fill with data
u64 count = 1;
switch (instr.ld_g.size) {
case Tegra::Shader::UniformType::Single:
count = 1;
break;
case Tegra::Shader::UniformType::Double:
count = 2;
break;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
count = 4;
break;
default:
UNREACHABLE_MSG("Unimplemented LDG size!");
}
auto [gpr_index, index, offset] = last_iadd;
// The last IADD might be the upper u32 of address, so instead take the one before
// that.
if (gpr_index == Register::ZeroIndex) {
gpr_index = s_last_iadd.out;
index = s_last_iadd.cbuf_index;
offset = s_last_iadd.cbuf_offset;
}
const auto gpr = regs.GetRegisterAsInteger(gpr_index);
const auto constbuffer =
regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms.insert(
{index, offset * 4});
const auto memory = fmt::format("global_memory_region_{}",
Core::System::GetInstance()
.GPU()
.Maxwell3D()
.state.global_memory_uniforms.size() -
1);
const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
const auto o_register = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
const auto address = "( " + immediate + " + " + o_register + " )";
const auto base_sub = address + " - " + constbuffer;
// New scope to prevent potential conflicts
shader.AddLine('{');
++shader.scope;
shader.AddLine("uint final_offset = " + base_sub + ";");
for (size_t out = 0; out < count; ++out) {
const u64 reg_id = instr.gpr0.Value() + out;
const auto this_memory =
fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
memory, out * 4, out * 4);
regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
}
--shader.scope;
shader.AddLine('}');
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -3206,9 +3294,18 @@ private:
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
struct IADDReference {
Register out;
u64 cbuf_index;
u64 cbuf_offset;
};
IADDReference last_iadd{};
IADDReference s_last_iadd{};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace Decompiler
};
std::string GetCommonDeclarations() {
return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",