Compare commits

...

17 Commits

Author SHA1 Message Date
bunnei
c43eaa94f3 gl_shader_decompiler: Implement SEL instruction. 2018-07-22 00:37:12 -04:00
bunnei
4cd5df95d6 Merge pull request #761 from bunnei/improve-raster-cache
Improvements to rasterizer cache
2018-07-21 20:28:53 -07:00
bunnei
63fbf9a7d3 gl_rasterizer_cache: Blit surfaces on recreation instead of flush and load. 2018-07-21 21:51:06 -04:00
bunnei
4301f0b539 gl_rasterizer_cache: Use GPUVAddr as cache key, not parameter set. 2018-07-21 21:51:06 -04:00
bunnei
cd47391c2d gl_rasterizer_cache: Use zeta_width and zeta_height registers for depth buffer. 2018-07-21 21:51:06 -04:00
bunnei
d8c60029d6 gl_rasterizer: Use zeta_enable register to enable depth buffer. 2018-07-21 21:51:06 -04:00
bunnei
5287991a36 maxwell_3d: Add depth buffer enable, width, and height registers. 2018-07-21 21:51:05 -04:00
bunnei
53a219f163 Merge pull request #759 from lioncash/redundant
file_util: Remove redundant duplicate return in GetPathWithoutTop()
2018-07-21 18:50:38 -07:00
bunnei
3ac736c003 Merge pull request #748 from lioncash/namespace
video_core: Use nested namespaces where applicable
2018-07-21 18:50:14 -07:00
bunnei
f5e87f4ce1 Merge pull request #758 from lioncash/sync
common: Remove synchronized_wrapper.h
2018-07-21 18:30:31 -07:00
bunnei
9533875eeb Merge pull request #760 from lioncash/path
file_util: Use an enum class for GetUserPath()
2018-07-21 18:30:04 -07:00
bunnei
d95a1a3742 Merge pull request #762 from Subv/ioctl2
GPU: Implement the NVGPU_IOCTL_CHANNEL_KICKOFF_PB ioctl2 command.
2018-07-21 18:28:55 -07:00
Subv
5c49e56d41 GPU: Implement the NVGPU_IOCTL_CHANNEL_KICKOFF_PB ioctl2 command.
This behaves quite similarly to the SubmitGPFIFO command. Referenced from Ryujinx.
Many thanks to @gdkchan for investigating this!
2018-07-21 15:50:02 -05:00
Lioncash
34d6a1349c file_util: Remove explicit type from std::min() in GetPathWithoutTop()
Given both operands are the same type, there won't be an issue with
overload selection that requires making this explicit.
2018-07-21 15:19:32 -04:00
Lioncash
41660c8923 file_util: Remove redundant duplicate return in GetPathWithoutTop() 2018-07-21 15:18:23 -04:00
Lioncash
973fdce79b common: Remove synchronized_wrapper.h
This is entirely unused in the codebase.
2018-07-21 14:51:44 -04:00
Lioncash
bb960c8cb4 video_core: Use nested namespaces where applicable
Compresses a few namespace specifiers to be more compact.
2018-07-20 18:23:54 -04:00
20 changed files with 236 additions and 214 deletions

View File

@@ -63,7 +63,6 @@ add_library(common STATIC
string_util.cpp
string_util.h
swap.h
synchronized_wrapper.h
telemetry.cpp
telemetry.h
thread.cpp

View File

@@ -838,8 +838,7 @@ std::string GetPathWithoutTop(std::string path) {
}
const auto name_bck_index = path.find_first_of('\\');
const auto name_fwd_index = path.find_first_of('/');
return path.substr(std::min<size_t>(name_bck_index, name_fwd_index) + 1);
return path.substr(std::min<size_t>(name_bck_index, name_fwd_index) + 1);
return path.substr(std::min(name_bck_index, name_fwd_index) + 1);
}
std::string GetFilename(std::string path) {

View File

@@ -1,85 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <mutex>
namespace Common {
template <typename T>
class SynchronizedWrapper;
/**
* Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
* greatly reduces the chance that someone will access the wrapped resource without locking the
* mutex.
*/
template <typename T>
class SynchronizedRef {
public:
SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) {
wrapper.mutex.lock();
}
SynchronizedRef(SynchronizedRef&) = delete;
SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) {
o.wrapper = nullptr;
}
~SynchronizedRef() {
if (wrapper)
wrapper->mutex.unlock();
}
SynchronizedRef& operator=(SynchronizedRef&) = delete;
SynchronizedRef& operator=(SynchronizedRef&& o) {
std::swap(wrapper, o.wrapper);
return *this;
}
T& operator*() {
return wrapper->data;
}
const T& operator*() const {
return wrapper->data;
}
T* operator->() {
return &wrapper->data;
}
const T* operator->() const {
return &wrapper->data;
}
private:
SynchronizedWrapper<T>* wrapper;
};
/**
* Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
* one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
* SyncronizedRef on this wrapper. Inspired by Rust's Mutex type
* (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
*/
template <typename T>
class SynchronizedWrapper {
public:
template <typename... Args>
SynchronizedWrapper(Args&&... args) : data(std::forward<Args>(args)...) {}
SynchronizedRef<T> Lock() {
return {*this};
}
private:
template <typename U>
friend class SynchronizedRef;
std::mutex mutex;
T data;
};
} // namespace Common

View File

@@ -42,6 +42,9 @@ u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u
if (command.cmd == NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO) {
return SubmitGPFIFO(input, output);
}
if (command.cmd == NVGPU_IOCTL_CHANNEL_KICKOFF_PB) {
return KickoffPB(input, output);
}
}
UNIMPLEMENTED_MSG("Unimplemented ioctl");
@@ -127,14 +130,37 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
params.gpfifo, params.num_entries, params.flags);
params.address, params.num_entries, params.flags);
auto entries = std::vector<IoctlGpfifoEntry>();
entries.resize(params.num_entries);
std::memcpy(&entries[0], &input.data()[sizeof(IoctlSubmitGpfifo)],
params.num_entries * sizeof(IoctlGpfifoEntry));
for (auto entry : entries) {
VAddr va_addr = entry.Address();
Tegra::GPUVAddr va_addr = entry.Address();
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
}
params.fence_out.id = 0;
params.fence_out.value = 0;
std::memcpy(output.data(), &params, output.size());
return 0;
}
u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) {
if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED();
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
params.address, params.num_entries, params.flags);
std::vector<IoctlGpfifoEntry> entries(params.num_entries);
Memory::ReadBlock(params.address, entries.data(),
params.num_entries * sizeof(IoctlGpfifoEntry));
for (auto entry : entries) {
Tegra::GPUVAddr va_addr = entry.Address();
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
}
params.fence_out.id = 0;

View File

@@ -15,6 +15,7 @@ namespace Service::Nvidia::Devices {
class nvmap;
constexpr u32 NVGPU_IOCTL_MAGIC('H');
constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);
constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
class nvhost_gpu final : public nvdevice {
public:
@@ -158,14 +159,14 @@ private:
BitField<31, 1, u32_le> unk2;
};
VAddr Address() const {
return (static_cast<VAddr>(gpu_va_hi) << 32) | entry0;
Tegra::GPUVAddr Address() const {
return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
}
};
static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
struct IoctlSubmitGpfifo {
u64_le gpfifo; // (ignored) pointer to gpfifo fence structs
u64_le address; // pointer to gpfifo entry structs
u32_le num_entries; // number of fence objects being submitted
u32_le flags;
IoctlFence fence_out; // returned new fence object for others to wait on
@@ -193,6 +194,7 @@ private:
u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);

View File

@@ -101,7 +101,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
{8, &NVDRV::SetClientPID, "SetClientPID"},
{9, nullptr, "DumpGraphicsMemoryInfo"},
{10, nullptr, "InitializeDevtools"},
{11, nullptr, "Ioctl2"},
{11, &NVDRV::Ioctl, "Ioctl2"},
{12, nullptr, "Ioctl3"},
{13, &NVDRV::FinishInitialize, "FinishInitialize"},
};

View File

@@ -6,8 +6,7 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/textures/decoders.h"
namespace Tegra {
namespace Engines {
namespace Tegra::Engines {
Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
@@ -69,5 +68,4 @@ void Fermi2D::HandleSurfaceCopy() {
}
}
} // namespace Engines
} // namespace Tegra
} // namespace Tegra::Engines

View File

@@ -12,8 +12,7 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra {
namespace Engines {
namespace Tegra::Engines {
#define FERMI2D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
@@ -110,5 +109,4 @@ ASSERT_REG_POSITION(operation, 0xAB);
ASSERT_REG_POSITION(trigger, 0xB5);
#undef ASSERT_REG_POSITION
} // namespace Engines
} // namespace Tegra
} // namespace Tegra::Engines

View File

@@ -17,8 +17,7 @@
#include "video_core/memory_manager.h"
#include "video_core/textures/texture.h"
namespace Tegra {
namespace Engines {
namespace Tegra::Engines {
#define MAXWELL3D_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
@@ -488,7 +487,12 @@ public:
};
} rt_control;
INSERT_PADDING_WORDS(0x2B);
INSERT_PADDING_WORDS(0x2);
u32 zeta_width;
u32 zeta_height;
INSERT_PADDING_WORDS(0x27);
u32 depth_test_enable;
@@ -541,7 +545,11 @@ public:
u32 vb_element_base;
INSERT_PADDING_WORDS(0x49);
INSERT_PADDING_WORDS(0x40);
u32 zeta_enable;
INSERT_PADDING_WORDS(0x8);
struct {
u32 tsc_address_high;
@@ -866,6 +874,8 @@ ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -875,6 +885,7 @@ ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil, 0x4E0);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(stencil_two_side, 0x565);
@@ -898,5 +909,4 @@ ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
#undef ASSERT_REG_POSITION
} // namespace Engines
} // namespace Tegra
} // namespace Tegra::Engines

View File

@@ -6,8 +6,7 @@
#include "common/common_types.h"
namespace Tegra {
namespace Engines {
namespace Tegra::Engines {
class MaxwellCompute final {
public:
@@ -18,5 +17,4 @@ public:
void WriteReg(u32 method, u32 value);
};
} // namespace Engines
} // namespace Tegra
} // namespace Tegra::Engines

View File

@@ -12,8 +12,7 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace Tegra {
namespace Engines {
namespace Tegra::Engines {
class MaxwellDMA final {
public:
@@ -151,5 +150,4 @@ ASSERT_REG_POSITION(src_params, 0x1CA);
#undef ASSERT_REG_POSITION
} // namespace Engines
} // namespace Tegra
} // namespace Tegra::Engines

View File

@@ -15,8 +15,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
namespace Tegra {
namespace Shader {
namespace Tegra::Shader {
struct Register {
/// Number of registers
@@ -109,8 +108,7 @@ union Sampler {
u64 value{};
};
} // namespace Shader
} // namespace Tegra
} // namespace Tegra::Shader
namespace std {
@@ -127,8 +125,7 @@ struct make_unsigned<Tegra::Shader::Register> {
} // namespace std
namespace Tegra {
namespace Shader {
namespace Tegra::Shader {
enum class Pred : u64 {
UnusedIndex = 0x7,
@@ -291,6 +288,11 @@ union Instruction {
BitField<49, 1, u64> negate_a;
} alu_integer;
union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> neg_pred;
} sel;
union {
BitField<39, 3, u64> pred;
BitField<42, 1, u64> negate_pred;
@@ -516,6 +518,9 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
SEL_C,
SEL_R,
SEL_IMM,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -716,6 +721,9 @@ private:
INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100010100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -784,5 +792,4 @@ private:
}
};
} // namespace Shader
} // namespace Tegra
} // namespace Tegra::Shader

View File

@@ -387,7 +387,7 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.Z) {
clear_mask |= GL_DEPTH_BUFFER_BIT;
use_depth_fb = true;
use_depth_fb = regs.zeta_enable != 0;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -413,11 +413,13 @@ void RasterizerOpenGL::Clear() {
glClear(clear_mask);
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}
@@ -431,7 +433,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context;
auto [dirty_color_surface, dirty_depth_surface] =
ConfigureFramebuffers(true, regs.zeta.Address() != 0);
ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0);
SyncDepthTestState();
SyncBlendState();
@@ -520,11 +522,13 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
// Mark framebuffer surfaces as dirty
if (dirty_color_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.MarkSurfaceAsDirty(dirty_depth_surface);
if (Settings::values.use_accurate_framebuffers) {
if (dirty_color_surface != nullptr) {
res_cache.FlushSurface(dirty_color_surface);
}
if (dirty_depth_surface != nullptr) {
res_cache.FlushSurface(dirty_depth_surface);
}
}
}

View File

@@ -65,9 +65,9 @@ struct FormatTuple {
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
SurfaceParams params{};
params.addr = zeta_address;
@@ -77,9 +77,9 @@ struct FormatTuple {
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
params.size_in_bytes = params.SizeInBytes();
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.size_in_bytes = params.SizeInBytes();
return params;
}
@@ -254,6 +254,60 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
cur_state.Apply();
}
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.draw.read_framebuffer = read_fb_handle;
state.draw.draw_framebuffer = draw_fb_handle;
state.Apply();
u32 buffers{};
if (type == SurfaceType::ColorTexture) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
buffers = GL_COLOR_BUFFER_BIT;
} else if (type == SurfaceType::Depth) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
buffers = GL_DEPTH_BUFFER_BIT;
} else if (type == SurfaceType::DepthStencil) {
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
src_tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
dst_tex, 0);
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
}
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
return true;
}
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
texture.Create();
const auto& rect{params.GetRect()};
@@ -519,8 +573,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
}
if (using_depth_fb) {
depth_params =
SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format);
depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
regs.zeta.Address(), regs.zeta.format);
}
MathUtil::Rectangle<u32> color_rect{};
@@ -565,17 +619,9 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
}
void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
if (Settings::values.use_accurate_framebuffers) {
// If enabled, always flush dirty surfaces
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
} else {
// Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
// and flushes are very slow and do not seem to improve accuracy
const auto& params{surface->GetSurfaceParams()};
Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
}
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@@ -588,25 +634,53 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none)
return {};
// Check for an exact match in existing surfaces
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
// Look up surface in the cache based on address
const auto& search{surface_cache.find(params.addr)};
Surface surface;
if (search != surface_cache.end()) {
surface = search->second;
if (Settings::values.use_accurate_framebuffers) {
// Reload the surface from Switch memory
LoadSurface(surface);
// If use_accurate_framebuffers is enabled, always load from memory
FlushSurface(surface);
UnregisterSurface(surface);
} else if (surface->GetSurfaceParams() != params) {
// If surface parameters changed, recreate the surface from the old one
return RecreateSurface(surface, params);
} else {
// Use the cached surface as-is
return surface;
}
} else {
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
}
// No surface found - create a new one
surface = std::make_shared<CachedSurface>(params);
RegisterSurface(surface);
LoadSurface(surface);
return surface;
}
Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const SurfaceParams& new_params) {
// Verify surface is compatible for blitting
const auto& params{surface->GetSurfaceParams()};
ASSERT(params.type == new_params.type);
ASSERT(params.pixel_format == new_params.pixel_format);
ASSERT(params.component_type == new_params.component_type);
// Create a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{std::make_shared<CachedSurface>(new_params)};
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle,
draw_framebuffer.handle);
// Update cache accordingly
UnregisterSurface(surface);
RegisterSurface(new_surface);
return new_surface;
}
Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
// Tries to find the GPU address of a framebuffer based on the CPU address. This is because
// final output framebuffers are specified by CPU address, but internally our GPU cache uses
@@ -652,22 +726,20 @@ void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size)
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search != surface_cache.end()) {
// Registered already
return;
}
surface_cache[surface_key] = surface;
surface_cache[params.addr] = surface;
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
}
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
const auto& params{surface->GetSurfaceParams()};
const auto& surface_key{SurfaceKey::Create(params)};
const auto& search{surface_cache.find(surface_key)};
const auto& search{surface_cache.find(params.addr)};
if (search == surface_cache.end()) {
// Unregistered already

View File

@@ -10,7 +10,6 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -137,6 +136,7 @@ struct SurfaceParams {
ASSERT(static_cast<size_t>(format) < bpp_table.size());
return bpp_table[static_cast<size_t>(format)];
}
u32 GetFormatBpp() const {
return GetFormatBpp(pixel_format);
}
@@ -365,9 +365,21 @@ struct SurfaceParams {
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config,
Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format);
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format);
bool operator==(const SurfaceParams& other) const {
return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
height, unaligned_height, size_in_bytes) ==
std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
other.component_type, other.type, other.width, other.height,
other.unaligned_height, other.size_in_bytes);
}
bool operator!=(const SurfaceParams& other) const {
return !operator==(other);
}
Tegra::GPUVAddr addr;
bool is_tiled;
@@ -381,24 +393,6 @@ struct SurfaceParams {
size_t size_in_bytes;
};
/// Hashable variation of SurfaceParams, used for a key in the surface cache
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
static SurfaceKey Create(const SurfaceParams& params) {
SurfaceKey res;
res.state = params;
return res;
}
};
namespace std {
template <>
struct hash<SurfaceKey> {
size_t operator()(const SurfaceKey& k) const {
return k.Hash();
}
};
} // namespace std
class CachedSurface final {
public:
CachedSurface(const SurfaceParams& params);
@@ -444,8 +438,8 @@ public:
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
const MathUtil::Rectangle<s32>& viewport);
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
void MarkSurfaceAsDirty(const Surface& surface);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
/// Tries to find a framebuffer GPU address based on the provided CPU address
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
@@ -460,6 +454,9 @@ private:
void LoadSurface(const Surface& surface);
Surface GetSurface(const SurfaceParams& params);
/// Recreates a surface with new parameters
Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
/// Register surface into the cache
void RegisterSurface(const Surface& surface);
@@ -469,7 +466,7 @@ private:
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
std::unordered_map<SurfaceKey, Surface> surface_cache;
std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
PageMap cached_pages;
OGLFramebuffer read_framebuffer;

View File

@@ -12,8 +12,7 @@
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
namespace GLShader {
namespace Decompiler {
namespace GLShader::Decompiler {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
@@ -1140,6 +1139,15 @@ private:
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
break;
}
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
std::string condition =
GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
regs.SetRegisterToInteger(instr.gpr0, true, 0,
'(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
break;
}
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
@@ -1845,5 +1853,4 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
return boost::none;
}
} // namespace Decompiler
} // namespace GLShader
} // namespace GLShader::Decompiler

View File

@@ -12,8 +12,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
namespace GLShader {
namespace Decompiler {
namespace GLShader::Decompiler {
using Tegra::Engines::Maxwell3D;
@@ -23,5 +22,4 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix);
} // namespace Decompiler
} // namespace GLShader
} // namespace GLShader::Decompiler

View File

@@ -10,8 +10,7 @@
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
namespace Tegra {
namespace Texture {
namespace Tegra::Texture {
/**
* Calculates the offset of an (x, y) position within a swizzled texture.
@@ -186,5 +185,4 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
return rgba_data;
}
} // namespace Texture
} // namespace Tegra
} // namespace Tegra::Texture

View File

@@ -8,8 +8,7 @@
#include "common/common_types.h"
#include "video_core/textures/texture.h"
namespace Tegra {
namespace Texture {
namespace Tegra::Texture {
/**
* Unswizzles a swizzled texture without changing its format.
@@ -33,5 +32,4 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
u32 height);
} // namespace Texture
} // namespace Tegra
} // namespace Tegra::Texture

View File

@@ -10,8 +10,7 @@
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace Tegra {
namespace Texture {
namespace Tegra::Texture {
enum class TextureFormat : u32 {
R32_G32_B32_A32 = 0x01,
@@ -260,5 +259,4 @@ struct FullTextureInfo {
/// Returns the number of bytes per pixel of the input texture format.
u32 BytesPerPixel(TextureFormat format);
} // namespace Texture
} // namespace Tegra
} // namespace Tegra::Texture