MaxwellDMA: Correct DMA for copies with offset.

TextureCache: Implement Accelerate DMA
MaxwellDMA: Setup options for Accelerate DMA.
2019-07-24 01:13:08 -04:00 · 2019-07-24 00:14:05 -04:00 · 2019-07-23 16:18:46 -04:00
11 changed files with 380 additions and 48 deletions
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -37,6 +37,109 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
 #undef MAXWELLDMA_REG_INDEX
 }

+void MaxwellDMA::TiledLinearCopy(const std::size_t src_size, const std::size_t dst_size,
+                                 const std::size_t bytes_per_pixel) {
+    const GPUVAddr source = regs.src_address.Address();
+    const GPUVAddr dest = regs.dst_address.Address();
+
+    const std::size_t src_layer_size = Texture::CalculateSize(
+        true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
+        regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+
+    if (read_buffer.size() < src_size) {
+        read_buffer.resize(src_size);
+    }
+
+    if (write_buffer.size() < dst_size) {
+        write_buffer.resize(dst_size);
+    }
+
+    memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+    memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+
+    Texture::UnswizzleSubrect(
+        regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
+        read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
+        regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
+
+    memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+}
+
+void MaxwellDMA::LinearTiledCopy(const std::size_t src_size, const std::size_t dst_size,
+                                 const std::size_t bytes_per_pixel) {
+    const GPUVAddr source = regs.src_address.Address();
+    const GPUVAddr dest = regs.dst_address.Address();
+
+    const std::size_t dst_layer_size = Texture::CalculateSize(
+        true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+        regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+    if (read_buffer.size() < src_size) {
+        read_buffer.resize(src_size);
+    }
+
+    if (write_buffer.size() < dst_size) {
+        write_buffer.resize(dst_size);
+    }
+
+    memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+    memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+
+    // If the input is linear and the output is tiled, swizzle the input and copy it over.
+    Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
+                            regs.dst_params.pos_x, regs.dst_params.pos_y, bytes_per_pixel,
+                            write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+                            read_buffer.data(), regs.dst_params.BlockHeight());
+
+    memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
+}
+
+void MaxwellDMA::TextureAccelerateDMA(const std::size_t src_size, const std::size_t dst_size,
+                                      const bool src_hit, const bool dst_hit,
+                                      const std::size_t bytes_per_pixel) {
+    // Configure Source
+    SurfaceConfig src_config;
+    src_config.in_cache = src_hit;
+    src_config.gpu_addr = regs.src_address.Address();
+    src_config.size = src_size;
+    src_config.is_linear = regs.exec.is_src_linear != 0;
+    src_config.bytes_per_pixel = bytes_per_pixel;
+    if (src_config.is_linear) {
+        src_config.pitch = regs.src_pitch;
+        src_config.width = regs.x_count + regs.src_params.pos_x;
+        src_config.height = regs.y_count + regs.src_params.pos_y;
+    } else {
+        src_config.tiled = regs.src_params;
+    }
+
+    // Configure Destination
+    SurfaceConfig dst_config;
+    dst_config.in_cache = dst_hit;
+    dst_config.gpu_addr = regs.dst_address.Address();
+    dst_config.size = dst_size;
+    dst_config.is_linear = regs.exec.is_dst_linear != 0;
+    dst_config.bytes_per_pixel = bytes_per_pixel;
+    if (dst_config.is_linear) {
+        dst_config.pitch = regs.dst_pitch;
+        dst_config.width = regs.x_count + regs.dst_params.pos_x;
+        dst_config.height = regs.y_count + regs.dst_params.pos_y;
+    } else {
+        dst_config.tiled = regs.dst_params;
+    }
+
+    CopyConfig copy_config;
+    copy_config.src_pos_x = regs.src_params.pos_x;
+    copy_config.src_pos_y = regs.src_params.pos_y;
+    copy_config.src_pos_z = regs.src_params.pos_z;
+    copy_config.dst_pos_x = regs.dst_params.pos_x;
+    copy_config.dst_pos_y = regs.dst_params.pos_y;
+    copy_config.dst_pos_z = regs.dst_params.pos_z;
+    copy_config.width = regs.x_count;
+    copy_config.height = regs.y_count;
+
+    rasterizer.AccelerateDMATexture(src_config, dst_config, copy_config);
+}
+
 void MaxwellDMA::HandleCopy() {
    LOG_WARNING(HW_GPU, "Requested a DMA copy");

@@ -86,63 +189,44 @@ void MaxwellDMA::HandleCopy() {
    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
        ASSERT(regs.src_params.size_z == 1);
        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
        const std::size_t src_size = Texture::CalculateSize(
-            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+            true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());

        const std::size_t dst_size = regs.dst_pitch * regs.y_count;

-        if (read_buffer.size() < src_size) {
-            read_buffer.resize(src_size);
+        const u32 src_flags = rasterizer.IsCacheHit(source, src_size);
+        const u32 dst_flags = rasterizer.IsCacheHit(dest, dst_size);
+        const bool src_hit = src_flags == VideoCore::Caches::TextureCache;
+        const bool dst_hit = dst_flags == VideoCore::Caches::TextureCache;
+
+        if (src_hit || dst_hit) {
+            TextureAccelerateDMA(src_size, dst_size, src_hit, dst_hit, bytes_per_pixel);
+        } else {
+            TiledLinearCopy(src_size, dst_size, bytes_per_pixel);
        }
-
-        if (write_buffer.size() < dst_size) {
-            write_buffer.resize(dst_size);
-        }
-
-        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
-
-        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
-                                  write_buffer.data(), regs.src_params.BlockHeight(),
-                                  regs.src_params.pos_x, regs.src_params.pos_y);
-
-        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    } else {
        ASSERT(regs.dst_params.BlockDepth() == 0);

-        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
+        const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;

        const std::size_t dst_size = Texture::CalculateSize(
-            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+            true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());

-        const std::size_t dst_layer_size = Texture::CalculateSize(
-            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
-            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
-
        const std::size_t src_size = regs.src_pitch * regs.y_count;

-        if (read_buffer.size() < src_size) {
-            read_buffer.resize(src_size);
+        const u32 src_flags = rasterizer.IsCacheHit(source, src_size);
+        const u32 dst_flags = rasterizer.IsCacheHit(dest, dst_size);
+        const bool src_hit = src_flags == VideoCore::Caches::TextureCache;
+        const bool dst_hit = dst_flags == VideoCore::Caches::TextureCache;
+
+        if (src_hit || dst_hit) {
+            TextureAccelerateDMA(src_size, dst_size, src_hit, dst_hit, bytes_per_pixel);
+        } else {
+            LinearTiledCopy(src_size, dst_size, bytes_per_pixel);
        }
-
-        if (write_buffer.size() < dst_size) {
-            write_buffer.resize(dst_size);
-        }
-
-        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
-        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
-
-        // If the input is linear and the output is tiled, swizzle the input and copy it over.
-        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bytes_per_pixel,
-                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
-                                read_buffer.data(), regs.dst_params.BlockHeight());
-
-        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    }
 }

--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -58,6 +58,10 @@ public:
                BitField<16, 16, u32> pos_y;
            };

+            u32 BlockWidth() const {
+                return block_width.Value();
+            }
+
            u32 BlockHeight() const {
                return block_height.Value();
            }
@@ -177,6 +181,33 @@ public:
        };
    } regs{};

+    struct SurfaceConfig {
+        bool in_cache;
+        u32 bytes_per_pixel;
+        GPUVAddr gpu_addr;
+        std::size_t size;
+        bool is_linear;
+        union {
+            struct {
+                u32 pitch;
+                u32 width;
+                u32 height;
+            };
+            Regs::Parameters tiled;
+        };
+    };
+
+    struct CopyConfig {
+        u32 src_pos_x;
+        u32 src_pos_y;
+        u32 src_pos_z;
+        u32 dst_pos_x;
+        u32 dst_pos_y;
+        u32 dst_pos_z;
+        u32 width;
+        u32 height;
+    };
+
 private:
    Core::System& system;

@@ -187,6 +218,11 @@ private:
    std::vector<u8> read_buffer;
    std::vector<u8> write_buffer;

+    void TiledLinearCopy(std::size_t src_size, std::size_t dst_size, std::size_t bytes_per_pixel);
+    void LinearTiledCopy(std::size_t src_size, std::size_t dst_size, std::size_t bytes_per_pixel);
+    void TextureAccelerateDMA(std::size_t src_size, std::size_t dst_size, bool src_hit,
+                              bool dst_hit, std::size_t bytes_per_pixel);
+
    /// Performs the copy from the source buffer to the destination buffer as configured in the
    /// registers.
    void HandleCopy();
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -8,6 +8,7 @@
 #include <functional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"

 namespace Tegra {
@@ -22,6 +23,13 @@ enum class LoadCallbackStage {
    Build,
    Complete,
 };
+
+enum Caches : u32 {
+    TextureCache = 1,
+    BufferCache = 2,
+    ShaderCache = 4,
+};
+
 using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;

 class RasterizerInterface {
@@ -47,6 +55,11 @@ public:
    /// and invalidated
    virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;

+    /// Checks if the memory adress and size is within any of caches of the gpu.
+    /// The result will be a flag variable based on VideoCore::Caches, turning on
+    /// corresponding bits for caches that were hit.
+    virtual u32 IsCacheHit(GPUVAddr gpu_addr, std::size_t size) = 0;
+
    /// Notify rasterizer that a frame is about to finish
    virtual void TickFrame() = 0;

@@ -57,6 +70,10 @@ public:
        return false;
    }

+    virtual void AccelerateDMATexture(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
+                                      const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
+                                      const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {}
+
    /// Attempt to use a faster method to display the framebuffer to screen
    virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                                   u32 pixel_stride) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -811,6 +811,14 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
    InvalidateRegion(addr, size);
 }

+u32 RasterizerOpenGL::IsCacheHit(const GPUVAddr gpu_addr, const std::size_t size) {
+    u32 flags = 0;
+    if (texture_cache.IsHit(gpu_addr, size)) {
+        flags |= VideoCore::Caches::TextureCache;
+    }
+    return flags;
+}
+
 void RasterizerOpenGL::TickFrame() {
    buffer_cache.TickFrame();
 }
@@ -823,6 +831,13 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs
    return true;
 }

+void RasterizerOpenGL::AccelerateDMATexture(
+    const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
+    const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
+    const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {
+    texture_cache.AccelerateDMA(src_config, dst_config, copy_config);
+}
+
 bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
                                         VAddr framebuffer_addr, u32 pixel_stride) {
    if (!framebuffer_addr) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,10 +62,14 @@ public:
    void FlushRegion(CacheAddr addr, u64 size) override;
    void InvalidateRegion(CacheAddr addr, u64 size) override;
    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    u32 IsCacheHit(GPUVAddr gpu_addr, std::size_t size) override;
    void TickFrame() override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
+    void AccelerateDMATexture(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
+                              const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
+                              const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
--- a/src/video_core/texture_cache/copy_params.h
+++ b/src/video_core/texture_cache/copy_params.h
@@ -9,6 +9,7 @@
 namespace VideoCommon {

 struct CopyParams {
+    constexpr CopyParams() = default;
    constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
                         u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
                         u32 depth)
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -202,6 +202,39 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
    return params;
 }

+SurfaceParams SurfaceParams::CreateForDMASurface(
+    const Tegra::Engines::MaxwellDMA::SurfaceConfig& config,
+    const VideoCore::Surface::ComponentType component_type,
+    const VideoCore::Surface::PixelFormat pixel_format,
+    const VideoCore::Surface::SurfaceTarget target) {
+    SurfaceParams params{};
+    params.is_tiled = !config.is_linear;
+    params.srgb_conversion = false;
+    params.block_width = params.is_tiled ? std::min(config.tiled.BlockWidth(), 5U) : 0,
+    params.block_height = params.is_tiled ? std::min(config.tiled.BlockHeight(), 5U) : 0,
+    params.block_depth = params.is_tiled ? std::min(config.tiled.BlockDepth(), 5U) : 0,
+    params.tile_width_spacing = 1;
+    params.pixel_format = pixel_format;
+    params.component_type = component_type;
+    params.type = GetFormatType(pixel_format);
+    params.target = target;
+    if (params.is_tiled) {
+        params.depth = config.tiled.size_z;
+        params.width = config.tiled.size_x;
+        params.height = config.tiled.size_y;
+        params.pitch = params.width * config.bytes_per_pixel;
+    } else {
+        params.depth = 1;
+        params.width = config.width;
+        params.height = config.height;
+        params.pitch = config.pitch;
+    }
+    params.num_levels = 1;
+    params.emulated_levels = 1;
+    params.is_layered = params.IsLayered();
+    return params;
+}
+
 bool SurfaceParams::IsLayered() const {
    switch (target) {
    case SurfaceTarget::Texture1DArray:
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -12,6 +12,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
@@ -40,6 +41,12 @@ public:
    static SurfaceParams CreateForFermiCopySurface(
        const Tegra::Engines::Fermi2D::Regs::Surface& config);

+    /// Creates SurfaceCachedParams from a MaxwellDMA surface configuration.
+    static SurfaceParams CreateForDMASurface(
+        const Tegra::Engines::MaxwellDMA::SurfaceConfig& config,
+        VideoCore::Surface::ComponentType component_type,
+        VideoCore::Surface::PixelFormat pixel_format, VideoCore::Surface::SurfaceTarget target);
+
    std::size_t Hash() const {
        return static_cast<std::size_t>(
            Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -215,6 +215,50 @@ public:
        dst_surface.first->MarkAsModified(true, Tick());
    }

+    void AccelerateDMA(const Tegra::Engines::MaxwellDMA::SurfaceConfig& src_config,
+                       const Tegra::Engines::MaxwellDMA::SurfaceConfig& dst_config,
+                       const Tegra::Engines::MaxwellDMA::CopyConfig& copy_config) {
+        TSurface src_surface;
+        TSurface dst_surface;
+        DMAInfo src_info;
+        DMAInfo dst_info;
+        if (src_config.in_cache && src_config.in_cache) {
+            src_info = ExploreDMA(src_config);
+            dst_info = ExploreDMA(dst_config);
+        } else if (src_config.in_cache) {
+            src_info = ExploreDMA(src_config);
+            dst_info.pixel_format = src_info.pixel_format;
+            dst_info.component_type = src_info.component_type;
+            dst_info.target = FigureDMATarget(dst_config);
+        } else {
+            dst_info = ExploreDMA(dst_config);
+            src_info.pixel_format = dst_info.pixel_format;
+            src_info.component_type = dst_info.component_type;
+            src_info.target = FigureDMATarget(src_config);
+        }
+        src_surface = GetDMASurface(src_config, src_info).first;
+        dst_surface = GetDMASurface(dst_config, dst_info).first;
+        const auto& src_params = src_surface->GetSurfaceParams();
+        const auto& dst_params = dst_surface->GetSurfaceParams();
+        if (src_params.type != dst_params.type) {
+            BufferCopy(src_surface, dst_surface);
+            return;
+        }
+        CopyParams copy_params{};
+        copy_params.source_x = copy_config.src_pos_x;
+        copy_params.source_y = copy_config.src_pos_y;
+        copy_params.source_z = copy_config.src_pos_z;
+        copy_params.dest_x = copy_config.dst_pos_x;
+        copy_params.dest_y = copy_config.dst_pos_y;
+        copy_params.dest_z = copy_config.dst_pos_z;
+        copy_params.source_level = 0;
+        copy_params.dest_level = 0;
+        copy_params.width = copy_config.width;
+        copy_params.height = copy_config.height;
+        copy_params.depth = 1;
+        ImageCopy(src_surface, dst_surface, copy_params);
+    }
+
    TSurface TryFindFramebufferSurface(const u8* host_ptr) {
        const CacheAddr cache_addr = ToCacheAddr(host_ptr);
        if (!cache_addr) {
@@ -234,6 +278,26 @@ public:
        return ++ticks;
    }

+    bool IsHit(const GPUVAddr gpu_addr, const std::size_t size) {
+        std::lock_guard lock{mutex};
+        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+
+        if (!cache_addr) {
+            return false;
+        }
+
+        if (l1_cache.count(cache_addr) > 0) {
+            return true;
+        }
+
+        auto overlaps{GetSurfacesInRegion(cache_addr, size)};
+        if (overlaps.empty()) {
+            return false;
+        }
+        return true;
+    }
+
 protected:
    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
        : system{system}, rasterizer{rasterizer} {
@@ -344,6 +408,12 @@ private:
        BufferCopy = 3,
    };

+    struct DMAInfo {
+        SurfaceTarget target;
+        PixelFormat pixel_format;
+        VideoCore::Surface::ComponentType component_type;
+    };
+
    /**
     * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
     * @param overlaps, the overlapping surfaces registered in the cache.
@@ -773,6 +843,67 @@ private:
        return siblings_table[static_cast<std::size_t>(format)];
    }

+    DMAInfo ExploreDMA(const Tegra::Engines::MaxwellDMA::SurfaceConfig& config) {
+        const auto host_ptr{system.GPU().MemoryManager().GetPointer(config.gpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+        DMAInfo dma_info{};
+
+        auto it = l1_cache.find(cache_addr);
+
+        if (it != l1_cache.end()) {
+            TSurface current_surface = it->second;
+            const auto& params = current_surface->GetSurfaceParams();
+            if (params.is_tiled == !config.is_linear) {
+                if (params.is_tiled) {
+                    if (std::tie(config.tiled.size_x, config.tiled.size_y, config.tiled.size_z) ==
+                        std::tie(params.width, params.height, params.pitch)) {
+                        dma_info.target = params.target;
+                        dma_info.component_type = params.component_type;
+                        dma_info.pixel_format = params.pixel_format;
+                    }
+                } else {
+                    if (std::tie(config.width, config.height, config.pitch) ==
+                        std::tie(params.width, params.height, params.pitch)) {
+                        dma_info.target = params.target;
+                        dma_info.component_type = params.component_type;
+                        dma_info.pixel_format = params.pixel_format;
+                    }
+                }
+            }
+        }
+
+        auto overlaps{GetSurfacesInRegion(cache_addr, config.size)};
+        TSurface current_surface = overlaps[0];
+        const auto& params = current_surface->GetSurfaceParams();
+        dma_info.target = SurfaceTarget::Texture2DArray;
+        dma_info.component_type = params.component_type;
+        dma_info.pixel_format = params.pixel_format;
+        return dma_info;
+    }
+
+    std::pair<TSurface, TView> GetDMASurface(
+        const Tegra::Engines::MaxwellDMA::SurfaceConfig& config, const DMAInfo& info) {
+        SurfaceParams params = SurfaceParams::CreateForDMASurface(config, info.component_type,
+                                                                  info.pixel_format, info.target);
+        const GPUVAddr gpu_addr = config.gpu_addr;
+        return GetSurface(gpu_addr, params, true, false);
+    }
+
+    SurfaceTarget FigureDMATarget(const Tegra::Engines::MaxwellDMA::SurfaceConfig& config) {
+        if (config.is_linear) {
+            return SurfaceTarget::Texture2D;
+        } else {
+            if (config.tiled.size_z > 1) {
+                u32 block_depth = config.tiled.BlockDepth();
+                if (block_depth > 0) {
+                    return SurfaceTarget::Texture3D;
+                }
+                return SurfaceTarget::Texture2DArray;
+            }
+            return SurfaceTarget::Texture2D;
+        }
+    }
+
    struct FramebufferTargetInfo {
        TSurface target;
        TView view;
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -256,20 +256,23 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
 }

 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
-                    u32 block_height_bit) {
+                    u32 dst_x, u32 dst_y, u32 bytes_per_pixel, u8* swizzled_data,
+                    u8* unswizzled_data, u32 block_height_bit) {
    const u32 block_height = 1U << block_height_bit;
    const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
                                  gob_size_x};
    for (u32 line = 0; line < subrect_height; ++line) {
+        const u32 dst_line = line + dst_y;
        const u32 gob_address_y =
-            (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
-            ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
+            (dst_line / (gob_size_y * block_height)) * gob_size * block_height *
+                image_width_in_gobs +
+            ((dst_line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
        const auto& table = legacy_swizzle_table[line % gob_size_y];
        for (u32 x = 0; x < subrect_width; ++x) {
+            const u32 x2 = x + dst_x;
            const u32 gob_address =
-                gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
-            const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
+                gob_address_y + (x2 * bytes_per_pixel / gob_size_x) * gob_size * block_height;
+            const u32 swizzled_offset = gob_address + table[(x2 * bytes_per_pixel) % gob_size_x];
            u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
            u8* dest_addr = swizzled_data + swizzled_offset;

--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height

 /// Copies an untiled subrectangle into a tiled surface.
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
-                    u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
+                    u32 dst_x, u32 dst_y, u32 bytes_per_pixel, u8* swizzled_data,
+                    u8* unswizzled_data, u32 block_height);

 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
Author	SHA1	Message	Date
Fernando Sahmkow	e9deeb5a4c	MaxwellDMA: Correct DMA for copies with offset.	2019-07-24 01:13:08 -04:00
Fernando Sahmkow	474d81f1c9	TextureCache: Implement Accelerate DMA	2019-07-24 00:14:05 -04:00
Fernando Sahmkow	9cb2e3603f	MaxwellDMA: Setup options for Accelerate DMA.	2019-07-23 16:18:46 -04:00