Compare commits

...

3 Commits

Author SHA1 Message Date
Fernando Sahmkow
e9deeb5a4c MaxwellDMA: Correct DMA for copies with offset. 2019-07-24 01:13:08 -04:00
Fernando Sahmkow
474d81f1c9 TextureCache: Implement Accelerate DMA 2019-07-24 00:14:05 -04:00
Fernando Sahmkow
9cb2e3603f MaxwellDMA: Setup options for Accelerate DMA. 2019-07-23 16:18:46 -04:00
11 changed files with 380 additions and 48 deletions

View File

@@ -37,6 +37,109 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
#undef MAXWELLDMA_REG_INDEX
}
void MaxwellDMA::TiledLinearCopy(const std::size_t src_size, const std::size_t dst_size,
const std::size_t bytes_per_pixel) {
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
const std::size_t src_layer_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
Texture::UnswizzleSubrect(
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
void MaxwellDMA::LinearTiledCopy(const std::size_t src_size, const std::size_t dst_size,
const std::size_t bytes_per_pixel) {
const GPUVAddr source = regs.src_address.Address();
const GPUVAddr dest = regs.dst_address.Address();
const std::size_t dst_layer_size = Texture::CalculateSize(
true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
regs.dst_params.pos_x, regs.dst_params.pos_y, bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
read_buffer.data(), regs.dst_params.BlockHeight());
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
void MaxwellDMA::TextureAccelerateDMA(const std::size_t src_size, const std::size_t dst_size,
const bool src_hit, const bool dst_hit,
const std::size_t bytes_per_pixel) {
// Configure Source
SurfaceConfig src_config;
src_config.in_cache = src_hit;
src_config.gpu_addr = regs.src_address.Address();
src_config.size = src_size;
src_config.is_linear = regs.exec.is_src_linear != 0;
src_config.bytes_per_pixel = bytes_per_pixel;
if (src_config.is_linear) {
src_config.pitch = regs.src_pitch;
src_config.width = regs.x_count + regs.src_params.pos_x;
src_config.height = regs.y_count + regs.src_params.pos_y;
} else {
src_config.tiled = regs.src_params;
}
// Configure Destination
SurfaceConfig dst_config;
dst_config.in_cache = dst_hit;
dst_config.gpu_addr = regs.dst_address.Address();
dst_config.size = dst_size;
dst_config.is_linear = regs.exec.is_dst_linear != 0;
dst_config.bytes_per_pixel = bytes_per_pixel;
if (dst_config.is_linear) {
dst_config.pitch = regs.dst_pitch;
dst_config.width = regs.x_count + regs.dst_params.pos_x;
dst_config.height = regs.y_count + regs.dst_params.pos_y;
} else {
dst_config.tiled = regs.dst_params;
}
CopyConfig copy_config;
copy_config.src_pos_x = regs.src_params.pos_x;
copy_config.src_pos_y = regs.src_params.pos_y;
copy_config.src_pos_z = regs.src_params.pos_z;
copy_config.dst_pos_x = regs.dst_params.pos_x;
copy_config.dst_pos_y = regs.dst_params.pos_y;
copy_config.dst_pos_z = regs.dst_params.pos_z;
copy_config.width = regs.x_count;
copy_config.height = regs.y_count;
rasterizer.AccelerateDMATexture(src_config, dst_config, copy_config);
}
void MaxwellDMA::HandleCopy() {
LOG_WARNING(HW_GPU, "Requested a DMA copy");
@@ -86,63 +189,44 @@ void MaxwellDMA::HandleCopy() {
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
ASSERT(regs.src_params.size_z == 1);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
const u32 src_flags = rasterizer.IsCacheHit(source, src_size);
const u32 dst_flags = rasterizer.IsCacheHit(dest, dst_size);
const bool src_hit = src_flags == VideoCore::Caches::TextureCache;
const bool dst_hit = dst_flags == VideoCore::Caches::TextureCache;
if (src_hit || dst_hit) {
TextureAccelerateDMA(src_size, dst_size, src_hit, dst_hit, bytes_per_pixel);
} else {
TiledLinearCopy(src_size, dst_size, bytes_per_pixel);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
write_buffer.data(), regs.src_params.BlockHeight(),
regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.BlockDepth() == 0);
const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
const std::size_t dst_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
const u32 src_flags = rasterizer.IsCacheHit(source, src_size);
const u32 dst_flags = rasterizer.IsCacheHit(dest, dst_size);
const bool src_hit = src_flags == VideoCore::Caches::TextureCache;
const bool dst_hit = dst_flags == VideoCore::Caches::TextureCache;
if (src_hit || dst_hit) {
TextureAccelerateDMA(src_size, dst_size, src_hit, dst_hit, bytes_per_pixel);
} else {
LinearTiledCopy(src_size, dst_size, bytes_per_pixel);
}
if (write_buffer.size() < dst_size) {
write_buffer.resize(dst_size);
}
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
src_bytes_per_pixel,
write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
read_buffer.data(), regs.dst_params.BlockHeight());
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
}

View File

@@ -58,6 +58,10 @@ public:
BitField<16, 16, u32> pos_y;
};
u32 BlockWidth() const {
return block_width.Value();
}
u32 BlockHeight() const {
return block_height.Value();
}
@@ -177,6 +181,33 @@ public:
};
} regs{};
struct SurfaceConfig {
bool in_cache;
u32 bytes_per_pixel;
GPUVAddr gpu_addr;
std::size_t size;
bool is_linear;
union {
struct {
u32 pitch;
u32 width;
u32 height;
};
Regs::Parameters tiled;
};
};
struct CopyConfig {
u32 src_pos_x;
u32 src_pos_y;
u32 src_pos_z;
u32 dst_pos_x;
u32 dst_pos_y;
u32 dst_pos_z;
u32 width;
u32 height;
};
private:
Core::System& system;
@@ -187,6 +218,11 @@ private:
std::vector<u8> read_buffer;
std::vector<u8> write_buffer;
void TiledLinearCopy(std::size_t src_size, std::size_t dst_size, std::size_t bytes_per_pixel);
void LinearTiledCopy(std::size_t src_size, std::size_t dst_size, std::size_t bytes_per_pixel);
void TextureAccelerateDMA(std::size_t src_size, std::size_t dst_size, bool src_hit,
bool dst_hit, std::size_t bytes_per_pixel);
/// Performs the copy from the source buffer to the destination buffer as configured in the
/// registers.
void HandleCopy();

View File

@@ -8,6 +8,7 @@
#include <functional>
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
namespace Tegra {
@@ -22,6 +23,13 @@ enum class LoadCallbackStage {
Build,
Complete,
};
enum Caches : u32 {
TextureCache = 1,
BufferCache = 2,
ShaderCache = 4,
};
using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
class RasterizerInterface {
@@ -47,6 +55,11 @@ public:
/// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
/// Checks if the memory adress and size is within any of caches of the gpu.
/// The result will be a flag variable based on VideoCore::Caches, turning on
/// corresponding bits for caches that were hit.
virtual u32 IsCacheHit(GPUVAddr gpu_addr, std::size_t size) = 0;
/// Notify rasterizer that a frame is about to finish
virtual void TickFrame() = 0;
@@ -57,6 +70,10 @@ public:
return false;
}
virtual void AccelerateDMATexture(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {}
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) {

View File

@@ -811,6 +811,14 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
u32 RasterizerOpenGL::IsCacheHit(const GPUVAddr gpu_addr, const std::size_t size) {
u32 flags = 0;
if (texture_cache.IsHit(gpu_addr, size)) {
flags |= VideoCore::Caches::TextureCache;
}
return flags;
}
void RasterizerOpenGL::TickFrame() {
buffer_cache.TickFrame();
}
@@ -823,6 +831,13 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs
return true;
}
void RasterizerOpenGL::AccelerateDMATexture(
const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {
texture_cache.AccelerateDMA(src_config, dst_config, copy_config);
}
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {

View File

@@ -62,10 +62,14 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
u32 IsCacheHit(GPUVAddr gpu_addr, std::size_t size) override;
void TickFrame() override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
void AccelerateDMATexture(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;

View File

@@ -9,6 +9,7 @@
namespace VideoCommon {
struct CopyParams {
constexpr CopyParams() = default;
constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
u32 depth)

View File

@@ -202,6 +202,39 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
return params;
}
SurfaceParams SurfaceParams::CreateForDMASurface(
const Tegra::Engines::MaxwellDMA::SurfaceConfig& config,
const VideoCore::Surface::ComponentType component_type,
const VideoCore::Surface::PixelFormat pixel_format,
const VideoCore::Surface::SurfaceTarget target) {
SurfaceParams params{};
params.is_tiled = !config.is_linear;
params.srgb_conversion = false;
params.block_width = params.is_tiled ? std::min(config.tiled.BlockWidth(), 5U) : 0,
params.block_height = params.is_tiled ? std::min(config.tiled.BlockHeight(), 5U) : 0,
params.block_depth = params.is_tiled ? std::min(config.tiled.BlockDepth(), 5U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = pixel_format;
params.component_type = component_type;
params.type = GetFormatType(pixel_format);
params.target = target;
if (params.is_tiled) {
params.depth = config.tiled.size_z;
params.width = config.tiled.size_x;
params.height = config.tiled.size_y;
params.pitch = params.width * config.bytes_per_pixel;
} else {
params.depth = 1;
params.width = config.width;
params.height = config.height;
params.pitch = config.pitch;
}
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = params.IsLayered();
return params;
}
bool SurfaceParams::IsLayered() const {
switch (target) {
case SurfaceTarget::Texture1DArray:

View File

@@ -12,6 +12,7 @@
#include "common/common_types.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
@@ -40,6 +41,12 @@ public:
static SurfaceParams CreateForFermiCopySurface(
const Tegra::Engines::Fermi2D::Regs::Surface& config);
/// Creates SurfaceCachedParams from a MaxwellDMA surface configuration.
static SurfaceParams CreateForDMASurface(
const Tegra::Engines::MaxwellDMA::SurfaceConfig& config,
VideoCore::Surface::ComponentType component_type,
VideoCore::Surface::PixelFormat pixel_format, VideoCore::Surface::SurfaceTarget target);
std::size_t Hash() const {
return static_cast<std::size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));

View File

@@ -215,6 +215,50 @@ public:
dst_surface.first->MarkAsModified(true, Tick());
}
void AccelerateDMA(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {
TSurface src_surface;
TSurface dst_surface;
DMAInfo src_info;
DMAInfo dst_info;
if (src_config.in_cache && src_config.in_cache) {
src_info = ExploreDMA(src_config);
dst_info = ExploreDMA(dst_config);
} else if (src_config.in_cache) {
src_info = ExploreDMA(src_config);
dst_info.pixel_format = src_info.pixel_format;
dst_info.component_type = src_info.component_type;
dst_info.target = FigureDMATarget(dst_config);
} else {
dst_info = ExploreDMA(dst_config);
src_info.pixel_format = dst_info.pixel_format;
src_info.component_type = dst_info.component_type;
src_info.target = FigureDMATarget(src_config);
}
src_surface = GetDMASurface(src_config, src_info).first;
dst_surface = GetDMASurface(dst_config, dst_info).first;
const auto& src_params = src_surface->GetSurfaceParams();
const auto& dst_params = dst_surface->GetSurfaceParams();
if (src_params.type != dst_params.type) {
BufferCopy(src_surface, dst_surface);
return;
}
CopyParams copy_params{};
copy_params.source_x = copy_config.src_pos_x;
copy_params.source_y = copy_config.src_pos_y;
copy_params.source_z = copy_config.src_pos_z;
copy_params.dest_x = copy_config.dst_pos_x;
copy_params.dest_y = copy_config.dst_pos_y;
copy_params.dest_z = copy_config.dst_pos_z;
copy_params.source_level = 0;
copy_params.dest_level = 0;
copy_params.width = copy_config.width;
copy_params.height = copy_config.height;
copy_params.depth = 1;
ImageCopy(src_surface, dst_surface, copy_params);
}
TSurface TryFindFramebufferSurface(const u8* host_ptr) {
const CacheAddr cache_addr = ToCacheAddr(host_ptr);
if (!cache_addr) {
@@ -234,6 +278,26 @@ public:
return ++ticks;
}
bool IsHit(const GPUVAddr gpu_addr, const std::size_t size) {
std::lock_guard lock{mutex};
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
if (!cache_addr) {
return false;
}
if (l1_cache.count(cache_addr) > 0) {
return true;
}
auto overlaps{GetSurfacesInRegion(cache_addr, size)};
if (overlaps.empty()) {
return false;
}
return true;
}
protected:
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
: system{system}, rasterizer{rasterizer} {
@@ -344,6 +408,12 @@ private:
BufferCopy = 3,
};
struct DMAInfo {
SurfaceTarget target;
PixelFormat pixel_format;
VideoCore::Surface::ComponentType component_type;
};
/**
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
* @param overlaps, the overlapping surfaces registered in the cache.
@@ -773,6 +843,67 @@ private:
return siblings_table[static_cast<std::size_t>(format)];
}
DMAInfo ExploreDMA(const Tegra::Engines::MaxwellDMA::SurfaceConfig& config) {
const auto host_ptr{system.GPU().MemoryManager().GetPointer(config.gpu_addr)};
const auto cache_addr{ToCacheAddr(host_ptr)};
DMAInfo dma_info{};
auto it = l1_cache.find(cache_addr);
if (it != l1_cache.end()) {
TSurface current_surface = it->second;
const auto& params = current_surface->GetSurfaceParams();
if (params.is_tiled == !config.is_linear) {
if (params.is_tiled) {
if (std::tie(config.tiled.size_x, config.tiled.size_y, config.tiled.size_z) ==
std::tie(params.width, params.height, params.pitch)) {
dma_info.target = params.target;
dma_info.component_type = params.component_type;
dma_info.pixel_format = params.pixel_format;
}
} else {
if (std::tie(config.width, config.height, config.pitch) ==
std::tie(params.width, params.height, params.pitch)) {
dma_info.target = params.target;
dma_info.component_type = params.component_type;
dma_info.pixel_format = params.pixel_format;
}
}
}
}
auto overlaps{GetSurfacesInRegion(cache_addr, config.size)};
TSurface current_surface = overlaps[0];
const auto& params = current_surface->GetSurfaceParams();
dma_info.target = SurfaceTarget::Texture2DArray;
dma_info.component_type = params.component_type;
dma_info.pixel_format = params.pixel_format;
return dma_info;
}
std::pair<TSurface, TView> GetDMASurface(
const Tegra::Engines::MaxwellDMA::SurfaceConfig& config, const DMAInfo& info) {
SurfaceParams params = SurfaceParams::CreateForDMASurface(config, info.component_type,
info.pixel_format, info.target);
const GPUVAddr gpu_addr = config.gpu_addr;
return GetSurface(gpu_addr, params, true, false);
}
SurfaceTarget FigureDMATarget(const Tegra::Engines::MaxwellDMA::SurfaceConfig& config) {
if (config.is_linear) {
return SurfaceTarget::Texture2D;
} else {
if (config.tiled.size_z > 1) {
u32 block_depth = config.tiled.BlockDepth();
if (block_depth > 0) {
return SurfaceTarget::Texture3D;
}
return SurfaceTarget::Texture2DArray;
}
return SurfaceTarget::Texture2D;
}
}
struct FramebufferTargetInfo {
TSurface target;
TView view;

View File

@@ -256,20 +256,23 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
}
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
u32 block_height_bit) {
u32 dst_x, u32 dst_y, u32 bytes_per_pixel, u8* swizzled_data,
u8* unswizzled_data, u32 block_height_bit) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) {
const u32 dst_line = line + dst_y;
const u32 gob_address_y =
(line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
((line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
(dst_line / (gob_size_y * block_height)) * gob_size * block_height *
image_width_in_gobs +
((dst_line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
const auto& table = legacy_swizzle_table[line % gob_size_y];
for (u32 x = 0; x < subrect_width; ++x) {
const u32 x2 = x + dst_x;
const u32 gob_address =
gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
gob_address_y + (x2 * bytes_per_pixel / gob_size_x) * gob_size * block_height;
const u32 swizzled_offset = gob_address + table[(x2 * bytes_per_pixel) % gob_size_x];
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
u8* dest_addr = swizzled_data + swizzled_offset;

View File

@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
u32 dst_x, u32 dst_y, u32 bytes_per_pixel, u8* swizzled_data,
u8* unswizzled_data, u32 block_height);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,