Compare commits

...

16 Commits

Author SHA1 Message Date
Subv
51d4c772a2 RasterizerCache: Swizzle to a staging buffer before copying it to memory when flushing a texture.
This gives us slightly increased memory safety, writing to out of bounds memory will be caught at the point of write instead of several calls down the line as memory corruption
2018-09-21 13:53:13 -05:00
Subv
2fd06acc8f GPU/DMA: Fixed Tiled->Linear transfers.
We no longer write to out of bounds memory anymore.
2018-09-21 13:53:12 -05:00
Subv
bdb3920753 GPU/DMA: Copy the requested amount of data when doing Linear->Linear 2D transfers. 2018-09-21 13:53:12 -05:00
Subv
bb9eeba670 GPU/DMA: Fixed the Linear->Tiled and Linear->Linear transfer modes.
This fixes the loading bar in Has-Been Heroes.

The Tiled->Tiled transfer mode is not implemented yet and will assert.
2018-09-21 13:53:12 -05:00
Subv
47826fd090 RasterizerGL: Flush dirty cached objects before removing them from the cache.
There might be a case where we draw to a framebuffer (thus making it dirty) and then proceed to overwrite only a portion of it from the CPU. The current code would cause the rest of the modified framebuffer to be discarded.
2018-09-21 13:53:11 -05:00
Subv
b87b8db879 GPU/Engines: Flush and invalidate source/dest regions in Fermi2D and KeplerMemory copies. 2018-09-21 13:53:11 -05:00
Subv
23e1dd3c4d RasterizerCache: Do not flush surfaces that have not been modified.
This should bring the performance closer to what it was before flushing was implemented.
2018-09-21 13:53:10 -05:00
Subv
acfc8d7fc9 RasterizerCache: Convert the S8Z24 format from Z24S8 back to S8Z24 when flushing. 2018-09-21 13:53:10 -05:00
Subv
81afcf3f4e RasterizerCache: Allow flushing compressed formats. 2018-09-21 13:53:10 -05:00
Subv
b27ba353d4 RasterizerCache: Log problematic formats when flushing a surface. 2018-09-21 13:53:09 -05:00
Subv
8474a5adf7 GPU/DMA: Implemented source offset copies for unswizzling Tiled->Linear transfers.
This is used by nouveau to implement glReadPixels.
2018-09-21 13:53:09 -05:00
Subv
ca7eb39b86 RasterizerCache: Remove the glGetError call after a surface flush 2018-09-21 13:53:08 -05:00
Subv
393f2418c5 GPU/DMA: Flush the source memory region before a DMA transfer and invalidate the destination region after the transfer. 2018-09-21 13:53:08 -05:00
Subv
d0a814eaca GPU/DMA: Pass the current rasterizer as a variable when constructing the DMA engine. 2018-09-21 13:53:06 -05:00
Subv
047bbe0881 RasterizerCache: Re-introduced the code to flush the various resource caches. 2018-09-21 13:53:06 -05:00
Subv
181aca9af0 RasterizerCache: Reintroduced code for flushing OpenGL surfaces back to memory.
This is required for a proper implementation of the DMA and 2D engines.
2018-09-21 13:53:05 -05:00
15 changed files with 327 additions and 51 deletions

View File

@@ -4,11 +4,13 @@
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
void Fermi2D::WriteReg(u32 method, u32 value) {
ASSERT_MSG(method < Regs::NUM_REGS,
@@ -52,6 +54,12 @@ void Fermi2D::HandleSurfaceCopy() {
return;
}
rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu, dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);

View File

@@ -12,6 +12,10 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
#define FERMI2D_REG_INDEX(field_name) \
@@ -19,7 +23,7 @@ namespace Tegra::Engines {
class Fermi2D final {
public:
explicit Fermi2D(MemoryManager& memory_manager);
explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~Fermi2D() = default;
/// Write the value to the register identified by method.
@@ -94,6 +98,8 @@ public:
MemoryManager& memory_manager;
private:
VideoCore::RasterizerInterface& rasterizer;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
void HandleSurfaceCopy();

View File

@@ -5,10 +5,14 @@
#include "common/logging/log.h"
#include "core/memory.h"
#include "video_core/engines/kepler_memory.h"
#include "video_core/rasterizer_interface.h"
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
KeplerMemory::~KeplerMemory() = default;
void KeplerMemory::WriteReg(u32 method, u32 value) {
@@ -37,6 +41,11 @@ void KeplerMemory::ProcessData(u32 data) {
VAddr dest_address =
*memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
// We have to invalidate the destination region to evict any outdated surfaces from the cache.
// We do this before actually writing the new data because the destination address might contain
// a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_address, sizeof(u32));
Memory::Write32(dest_address, data);
state.write_offset++;

View File

@@ -11,6 +11,10 @@
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
#define KEPLERMEMORY_REG_INDEX(field_name) \
@@ -18,7 +22,7 @@ namespace Tegra::Engines {
class KeplerMemory final {
public:
KeplerMemory(MemoryManager& memory_manager);
KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
@@ -72,6 +76,7 @@ public:
private:
MemoryManager& memory_manager;
VideoCore::RasterizerInterface& rasterizer;
void ProcessData(u32 data);
};

View File

@@ -4,12 +4,14 @@
#include "core/memory.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/decoders.h"
namespace Tegra {
namespace Engines {
MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
: memory_manager(memory_manager), rasterizer{rasterizer} {}
void MaxwellDMA::WriteReg(u32 method, u32 value) {
ASSERT_MSG(method < Regs::NUM_REGS,
@@ -44,36 +46,79 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
ASSERT(regs.src_params.pos_x == 0);
ASSERT(regs.src_params.pos_y == 0);
ASSERT(regs.dst_params.pos_x == 0);
ASSERT(regs.dst_params.pos_y == 0);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
std::size_t copy_size = regs.x_count;
if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
// If both the source and the destination are in block layout, assert.
UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented");
return;
}
if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
// When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
// buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
if (regs.exec.enable_2d) {
copy_size = copy_size * regs.y_count;
// buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
// y_count).
if (!regs.exec.enable_2d) {
Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
return;
}
Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
// If both the source and the destination are in linear layout, perform a line-by-line
// copy. We're going to take a subrect of size (x_count, y_count) from the source
// rectangle. There is no need to manually flush/invalidate the regions because
// CopyBlock does that for us.
for (u32 line = 0; line < regs.y_count; ++line) {
const VAddr source_line = source_cpu + line * regs.src_pitch;
const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
Memory::CopyBlock(dest_line, source_line, regs.x_count);
}
return;
}
ASSERT(regs.exec.enable_2d == 1);
size_t copy_size = regs.x_count * regs.y_count;
const auto FlushAndInvalidate = [&](u32 src_size, u32 dst_size) {
// TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
// copying.
rasterizer.FlushRegion(source_cpu, src_size);
// We have to invalidate the destination region to evict any outdated surfaces from the
// cache. We do this before actually writing the new data because the destination address
// might contain a dirty surface that will have to be written back to memory.
rasterizer.InvalidateRegion(dest_cpu, dst_size);
};
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
dst_buffer, true, regs.src_params.BlockHeight());
u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
copy_size * src_bytes_per_pixel);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
regs.src_params.BlockHeight(), regs.src_params.pos_x,
regs.src_params.pos_y);
} else {
ASSERT(regs.dst_params.size_z == 1);
ASSERT(regs.src_pitch == regs.x_count);
u32 src_bpp = regs.src_pitch / regs.x_count;
FlushAndInvalidate(regs.src_pitch * regs.y_count,
regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
src_buffer, false, regs.dst_params.BlockHeight());
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
}
}

View File

@@ -12,11 +12,15 @@
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
namespace VideoCore {
class RasterizerInterface;
}
namespace Tegra::Engines {
class MaxwellDMA final {
public:
explicit MaxwellDMA(MemoryManager& memory_manager);
explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~MaxwellDMA() = default;
/// Write the value to the register identified by method.
@@ -129,6 +133,8 @@ public:
MemoryManager& memory_manager;
private:
VideoCore::RasterizerInterface& rasterizer;
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();

View File

@@ -25,10 +25,10 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
memory_manager = std::make_unique<Tegra::MemoryManager>();
maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
}
GPU::~GPU() = default;

View File

@@ -17,6 +17,22 @@
template <class T>
class RasterizerCache : NonCopyable {
public:
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
if (size == 0)
return;
const ObjectInterval interval{addr, addr + size};
for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
for (auto& cached_object : pair.second) {
if (!cached_object)
continue;
cached_object->Flush();
}
}
}
/// Mark the specified region as being invalidated
void InvalidateRegion(VAddr addr, u64 size) {
if (size == 0)
@@ -71,6 +87,7 @@ protected:
void Unregister(const T& object) {
auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
object->Flush();
object_cache.subtract({GetInterval(object), ObjectSet{object}});
}

View File

@@ -23,6 +23,9 @@ struct CachedBufferEntry final {
return size;
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() {}
VAddr addr;
std::size_t size;
GLintptr offset;

View File

@@ -322,6 +322,13 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
Surface color_surface =
res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
color_surface->MarkAsDirty();
}
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER,
GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
@@ -332,6 +339,11 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
std::array<GLenum, Maxwell::NumRenderTargets> buffers;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
if (color_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
color_surface->MarkAsDirty();
}
buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
@@ -351,6 +363,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
}
if (depth_surface) {
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
depth_surface->MarkAsDirty();
if (regs.stencil_enable) {
// Attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -538,9 +554,19 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
shader_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
buffer_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.FlushRegion(addr, size);
shader_cache.FlushRegion(addr, size);
buffer_cache.FlushRegion(addr, size);
}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -550,6 +576,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
FlushRegion(addr, size);
InvalidateRegion(addr, size);
}

View File

@@ -265,20 +265,22 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
if (morton_to_gl) {
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
addr, tile_size, bytes_per_pixel, stride, height, block_height);
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
memcpy(gl_buffer, data.data(), size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
Memory::GetPointer(addr), gl_buffer, morton_to_gl);
std::vector<u8> data(height * stride * bytes_per_pixel);
Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, bytes_per_pixel,
bytes_per_pixel, data.data(), gl_buffer, false,
block_height);
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
memcpy(Memory::GetPointer(addr), data.data(), size_to_copy);
}
}
@@ -357,17 +359,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
MortonCopy<false, PixelFormat::RGBA16UI>,
MortonCopy<false, PixelFormat::R11FG11FB10F>,
MortonCopy<false, PixelFormat::RGBA32UI>,
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
// formats are not supported
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
MortonCopy<false, PixelFormat::DXT1>,
MortonCopy<false, PixelFormat::DXT23>,
MortonCopy<false, PixelFormat::DXT45>,
MortonCopy<false, PixelFormat::DXN1>,
MortonCopy<false, PixelFormat::DXN2UNORM>,
MortonCopy<false, PixelFormat::DXN2SNORM>,
MortonCopy<false, PixelFormat::BC7U>,
MortonCopy<false, PixelFormat::BC6H_UF16>,
MortonCopy<false, PixelFormat::BC6H_SF16>,
// TODO(Subv): Swizzling ASTC formats are not supported
nullptr,
MortonCopy<false, PixelFormat::G8R8U>,
MortonCopy<false, PixelFormat::G8R8S>,
@@ -503,7 +504,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) {
union S8Z24 {
BitField<0, 24, u32> z24;
BitField<24, 8, u32> s8;
@@ -516,16 +517,23 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
};
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
S8Z24 input_pixel{};
Z24S8 output_pixel{};
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8));
if (reverse) {
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
s8z24_pixel.s8.Assign(z24s8_pixel.s8);
s8z24_pixel.z24.Assign(z24s8_pixel.z24);
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
} else {
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
z24s8_pixel.s8.Assign(s8z24_pixel.s8);
z24s8_pixel.z24.Assign(s8z24_pixel.z24);
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
}
}
}
}
@@ -561,7 +569,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
case PixelFormat::S8Z24:
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height);
ConvertS8Z24ToZ24S8(data, width, height, false);
break;
case PixelFormat::G8R8U:
@@ -572,6 +580,30 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
}
/**
* Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to
* Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or
* with typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
case PixelFormat::G8R8U:
case PixelFormat::G8R8S:
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8: {
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
static_cast<u32>(pixel_format));
UNREACHABLE();
break;
}
case PixelFormat::S8Z24:
// Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
ConvertS8Z24ToZ24S8(data, width, height, true);
break;
}
}
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
@@ -609,11 +641,70 @@ void CachedSurface::LoadGLBuffer() {
}
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
dirty = false;
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
ASSERT_MSG(false, "Unimplemented");
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
const auto& rect{params.GetRect()};
// Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
const size_t buffer_offset =
static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
GetGLBytesPerPixel(params.pixel_format);
const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format);
const u32 copy_size = params.width * params.height * bytes_per_pixel;
gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
ASSERT(x0 == 0 && y0 == 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
gl_buffer.data());
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
params.height);
ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
// TODO(bunnei): This only swizzles and copies a 2D texture - we do not yet know how to do
// this for 3D textures, etc.
switch (params.target) {
case SurfaceParams::SurfaceTarget::Texture2D:
// Pass impl. to the fallback code below
break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented tiled unload for target={}",
static_cast<u32>(params.target));
UNREACHABLE();
}
gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
params.width, params.block_height, params.height, &gl_buffer[buffer_offset], copy_size,
params.addr + buffer_offset);
} else {
Memory::WriteBlock(params.addr + buffer_offset, &gl_buffer[buffer_offset],
gl_buffer.size() - buffer_offset);
}
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));

View File

@@ -741,6 +741,19 @@ public:
return params.size_in_bytes;
}
void Flush() {
// There is no need to flush the surface if it hasn't been modified by us.
if (!dirty)
return;
FlushGLBuffer();
dirty = false;
}
void MarkAsDirty() {
dirty = true;
}
const OGLTexture& Texture() const {
return texture;
}
@@ -772,6 +785,7 @@ private:
std::vector<u8> gl_buffer;
SurfaceParams params;
GLenum gl_target;
bool dirty = false;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {

View File

@@ -32,6 +32,9 @@ public:
return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
}
// We do not have to flush this cache as things in it are never modified by us.
void Flush() {}
/// Gets the shader entries for the shader
const GLShader::ShaderEntries& GetShaderEntries() const {
return entries;

View File

@@ -88,6 +88,38 @@ void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_da
}
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height) {
for (u32 line = 0; line < subrect_height; ++line) {
for (u32 x = 0; x < subrect_width; ++x) {
u32 swizzled_offset =
GetSwizzleOffset(x, line, swizzled_width, bytes_per_pixel, block_height);
const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
const VAddr dest_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel);
}
}
}
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height, u32 offset_x, u32 offset_y) {
for (u32 line = 0; line < subrect_height; ++line) {
for (u32 x = 0; x < subrect_width; ++x) {
u32 swizzled_offset = GetSwizzleOffset(offset_x, line + offset_y, swizzled_width,
bytes_per_pixel, block_height);
const VAddr dest_line = unswizzled_data + line * dest_pitch + x;
const VAddr source_addr = swizzled_data + swizzled_offset;
Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel);
}
}
}
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:

View File

@@ -26,6 +26,16 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,
u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height);
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
u32 block_height, u32 offset_x, u32 offset_y);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.
*/