maxwell_3d: reduce sevirity of different component formats assert.

This was reduced due to happening on most games and at such constant rate that it affected performance heavily for the end user. In general, we are well aware of the assert and an implementation is already planned.
Merge pull request #2462 from lioncash/video-mm
2019-05-14 17:12:54 -04:00 · 2019-05-14 06:40:33 -04:00 · 2019-05-14 06:36:26 -04:00 · 2019-05-14 06:34:53 -04:00 · 2019-05-14 06:34:05 -04:00 · 2019-05-13 23:01:59 -03:00
43 changed files with 627 additions and 262 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - os: osx
      env: NAME="macos build"
      sudo: false
-      osx_image: xcode10.1
+      osx_image: xcode10.2
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,6 +7,10 @@ include(DownloadExternals)
 add_library(catch-single-include INTERFACE)
 target_include_directories(catch-single-include INTERFACE catch/single_include)

+# libfmt
+add_subdirectory(fmt)
+add_library(fmt::fmt ALIAS fmt)
+
 # Dynarmic
 if (ARCHITECTURE_x86_64)
    set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
    add_subdirectory(dynarmic)
 endif()

-# libfmt
-add_subdirectory(fmt)
-add_library(fmt::fmt ALIAS fmt)
-
 # getopt
 if (MSVC)
    add_subdirectory(getopt)
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,15 +21,29 @@ if (MSVC)
    # Ensure that projects build with Unicode support.
    add_definitions(-DUNICODE -D_UNICODE)

-    # /W3 - Level 3 warnings
-    # /MP - Multi-threaded compilation
-    # /Zi - Output debugging information
-    # /Zo - enhanced debug info for optimized builds
-    # /permissive- - enables stricter C++ standards conformance checks
-    # /EHsc - C++-only exception handling semantics
-    # /Zc:throwingNew - let codegen assume `operator new` will never return null
-    # /Zc:inline - let codegen omit inline functions in object files
-    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+    # /W3                 - Level 3 warnings
+    # /MP                 - Multi-threaded compilation
+    # /Zi                 - Output debugging information
+    # /Zo                 - Enhanced debug info for optimized builds
+    # /permissive-        - Enables stricter C++ standards conformance checks
+    # /EHsc               - C++-only exception handling semantics
+    # /volatile:iso       - Use strict standards-compliant volatile semantics.
+    # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
+    # /Zc:inline          - Let codegen omit inline functions in object files
+    # /Zc:throwingNew     - Let codegen assume `operator new` (without std::nothrow) will never return null
+    add_compile_options(
+        /W3
+        /MP
+        /Zi
+        /Zo
+        /permissive-
+        /EHsc
+        /std:c++latest
+        /volatile:iso
+        /Zc:externConstexpr
+        /Zc:inline
+        /Zc:throwingNew
+    )

    # /GS- - No stack buffer overflow checks
    add_compile_options("$<$<CONFIG:Release>:/GS->")
@@ -37,7 +51,10 @@ if (MSVC)
    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 else()
-    add_compile_options("-Wno-attributes")
+    add_compile_options(
+        -Wall
+        -Wno-attributes
+    )

    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
        add_compile_options("-stdlib=libc++")
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -10,6 +10,8 @@

 namespace Core::Frontend {

+GraphicsContext::~GraphicsContext() = default;
+
 class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
                              public std::enable_shared_from_this<TouchState> {
 public:
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -19,6 +19,8 @@ namespace Core::Frontend {
 */
 class GraphicsContext {
 public:
+    virtual ~GraphicsContext();
+
    /// Makes the graphics context current for the caller thread
    virtual void MakeCurrent() = 0;

--- a/src/core/hle/service/audio/audctl.cpp
+++ b/src/core/hle/service/audio/audctl.cpp
@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Audio, "called.");

    // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
    constexpr s32 target_min_volume = 0;

    IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Audio, "called.");

    // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
    constexpr s32 target_max_volume = 15;

    IPC::ResponseBuilder rb{ctx, 3};
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);

 std::string ReadCString(VAddr vaddr, std::size_t max_length);

-enum class FlushMode {
-    /// Write back modified surfaces to RAM
-    Flush,
-    /// Remove region from the cache
-    Invalidate,
-    /// Write back modified surfaces to RAM, and also remove them from the cache
-    FlushAndInvalidate,
-};
-
 /**
 * Mark each page touching the region as cached.
 */
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
 }

 TelemetrySession::TelemetrySession() {
-#ifdef ENABLE_WEB_SERVICE
-    backend = std::make_unique<WebService::TelemetryJson>(
-        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
-#else
-    backend = std::make_unique<Telemetry::NullVisitor>();
-#endif
    // Log one-time top-level information
    AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());

@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
                                .count()};
    AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);

+#ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+#else
+    auto backend = std::make_unique<Telemetry::NullVisitor>();
+#endif
+
    // Complete the session, submitting to web service if necessary
-    // This is just a placeholder to wrap up the session once the core completes and this is
-    // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
    field_collection.Accept(*backend);
    if (Settings::values.enable_telemetry)
        backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {

 bool TelemetrySession::SubmitTestcase() {
 #ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
    field_collection.Accept(*backend);
    return backend->SubmitTestcase();
 #else
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -39,7 +39,6 @@ public:

 private:
    Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
-    std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
 };

 /**
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
    dma_pusher.h
    debug_utils/debug_utils.cpp
    debug_utils/debug_utils.h
+    engines/engine_upload.cpp
+    engines/engine_upload.h
    engines/fermi_2d.cpp
    engines/fermi_2d.h
    engines/kepler_compute.cpp
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,48 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager, Registers& regs)
+    : memory_manager(memory_manager), regs(regs) {}
+
+void State::ProcessExec(const bool is_linear) {
+    write_offset = 0;
+    copy_size = regs.line_length_in * regs.line_count;
+    inner_buffer.resize(copy_size);
+    this->is_linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+    write_offset += sub_copy_size;
+    if (!is_last_call) {
+        return;
+    }
+    const GPUVAddr address{regs.dest.Address()};
+    if (is_linear) {
+        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+    } else {
+        UNIMPLEMENTED_IF(regs.dest.z != 0);
+        UNIMPLEMENTED_IF(regs.dest.depth != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+        const std::size_t dst_size = Tegra::Texture::CalculateSize(
+            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+        tmp_buffer.resize(dst_size);
+        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+                                      tmp_buffer.data());
+        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+    }
+}
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,75 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+    u32 line_length_in;
+    u32 line_count;
+
+    struct {
+        u32 address_high;
+        u32 address_low;
+        u32 pitch;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 z;
+        u32 x;
+        u32 y;
+
+        GPUVAddr Address() const {
+            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+        }
+
+        u32 BlockWidth() const {
+            return 1U << block_width.Value();
+        }
+
+        u32 BlockHeight() const {
+            return 1U << block_height.Value();
+        }
+
+        u32 BlockDepth() const {
+            return 1U << block_depth.Value();
+        }
+    } dest;
+};
+
+class State {
+public:
+    State(MemoryManager& memory_manager, Registers& regs);
+    ~State() = default;
+
+    void ProcessExec(const bool is_linear);
+    void ProcessData(const u32 data, const bool is_last_call);
+
+private:
+    u32 write_offset = 0;
+    u32 copy_size = 0;
+    std::vector<u8> inner_buffer;
+    std::vector<u8> tmp_buffer;
+    bool is_linear = false;
+    Registers& regs;
+    MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
 #define FERMI2D_REG_INDEX(field_name)                                                              \
    (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))

--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"

 namespace Tegra::Engines {

-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                             MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+                                                                                  memory_manager,
+                                                                                  regs.upload} {}

 KeplerCompute::~KeplerCompute() = default;

@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
    regs.reg_array[method_call.method] = method_call.argument;

    switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
    case KEPLER_COMPUTE_REG_INDEX(launch):
-        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
-        // kernels)
-        UNREACHABLE_MSG("Compute shaders are not implemented");
+        ProcessLaunch();
        break;
    default:
        break;
    }
 }

+void KeplerCompute::ProcessLaunch() {
+
+    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@

 #include <array>
 #include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"

+namespace Core {
+class System;
+}
+
 namespace Tegra {
 class MemoryManager;
 }

+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines {

+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))

 class KeplerCompute final {
 public:
-    explicit KeplerCompute(MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                           MemoryManager& memory_manager);
    ~KeplerCompute();

    static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:

        union {
            struct {
-                INSERT_PADDING_WORDS(0xAF);
+                INSERT_PADDING_WORDS(0x60);
+
+                Upload::Registers upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x3F);
+
+                struct {
+                    u32 address;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+                    }
+                } launch_desc_loc;
+
+                INSERT_PADDING_WORDS(0x1);

                u32 launch;

-                INSERT_PADDING_WORDS(0xC48);
+                INSERT_PADDING_WORDS(0x4A7);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tsc;
+
+                INSERT_PADDING_WORDS(0x3);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tic;
+
+                INSERT_PADDING_WORDS(0x22);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } code_loc;
+
+                INSERT_PADDING_WORDS(0x3FE);
+
+                u32 texture_const_buffer_index;
+
+                INSERT_PADDING_WORDS(0x374);
            };
            std::array<u32, NUM_REGS> reg_array;
        };
    } regs{};
+
+    struct LaunchParams {
+        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        u32 program_start;
+
+        INSERT_PADDING_WORDS(0x2);
+
+        BitField<30, 1, u32> linked_tsc;
+
+        BitField<0, 31, u32> grid_dim_x;
+        union {
+            BitField<0, 16, u32> grid_dim_y;
+            BitField<16, 16, u32> grid_dim_z;
+        };
+
+        INSERT_PADDING_WORDS(0x3);
+
+        BitField<0, 16, u32> shared_alloc;
+
+        BitField<0, 31, u32> block_dim_x;
+        union {
+            BitField<0, 16, u32> block_dim_y;
+            BitField<16, 16, u32> block_dim_z;
+        };
+
+        union {
+            BitField<0, 8, u32> const_buffer_enable_mask;
+            BitField<29, 2, u32> cache_layout;
+        } memory_config;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        struct {
+            u32 address_low;
+            union {
+                BitField<0, 8, u32> address_high;
+                BitField<15, 17, u32> size;
+            };
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+                                             address_low);
+            }
+        } const_buffer_config[8];
+
+        union {
+            BitField<0, 20, u32> local_pos_alloc;
+            BitField<27, 5, u32> barrier_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_neg_alloc;
+            BitField<24, 5, u32> gpr_alloc;
+        };
+
+        INSERT_PADDING_WORDS(0x11);
+    } launch_description;
+
+    struct {
+        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
+    } state{};
+
    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                  "KeplerCompute Regs has wrong size");

+    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+                  "KeplerCompute LaunchParams has wrong size");
+
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

 private:
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
    MemoryManager& memory_manager;
+    Upload::State upload_state;
+
+    void ProcessLaunch();
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                  "Field " #field_name " has invalid position")

+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
+    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);

 #undef ASSERT_REG_POSITION

--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@

 namespace Tegra::Engines {

-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+    : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}

 KeplerMemory::~KeplerMemory() = default;

@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {

    switch (method_call.method) {
    case KEPLERMEMORY_REG_INDEX(exec): {
-        ProcessExec();
+        upload_state.ProcessExec(regs.exec.linear != 0);
        break;
    }
    case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument, method_call.IsLastCall());
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
        break;
    }
    }
 }

-void KeplerMemory::ProcessExec() {
-    state.write_offset = 0;
-    state.copy_size = regs.line_length_in * regs.line_count;
-    state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
-    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
-    std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
-    state.write_offset += sub_copy_size;
-    if (is_last_call) {
-        const GPUVAddr address{regs.dest.Address()};
-        if (regs.exec.linear != 0) {
-            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
-        } else {
-            UNIMPLEMENTED_IF(regs.dest.z != 0);
-            UNIMPLEMENTED_IF(regs.dest.depth != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
-            const std::size_t dst_size = Tegra::Texture::CalculateSize(
-                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
-            std::vector<u8> tmp_buffer(dst_size);
-            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
-                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
-                                          state.inner_buffer.data(), tmp_buffer.data());
-            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-        }
-        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-    }
-}
-
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"

 namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
 class MemoryManager;
 }

-namespace VideoCore {
-class RasterizerInterface;
-}
-
 namespace Tegra::Engines {

+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
 #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))

 class KeplerMemory final {
 public:
-    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, MemoryManager& memory_manager);
    ~KeplerMemory();

    /// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
            struct {
                INSERT_PADDING_WORDS(0x60);

-                u32 line_length_in;
-                u32 line_count;
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 pitch;
-                    union {
-                        BitField<0, 4, u32> block_width;
-                        BitField<4, 4, u32> block_height;
-                        BitField<8, 4, u32> block_depth;
-                    };
-                    u32 width;
-                    u32 height;
-                    u32 depth;
-                    u32 z;
-                    u32 x;
-                    u32 y;
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-
-                    u32 BlockWidth() const {
-                        return 1U << block_width.Value();
-                    }
-
-                    u32 BlockHeight() const {
-                        return 1U << block_height.Value();
-                    }
-
-                    u32 BlockDepth() const {
-                        return 1U << block_depth.Value();
-                    }
-                } dest;
+                Upload::Registers upload;

                struct {
                    union {
@@ -96,28 +63,17 @@ public:
        };
    } regs{};

-    struct {
-        u32 write_offset = 0;
-        u32 copy_size = 0;
-        std::vector<u8> inner_buffer;
-    } state{};
-
 private:
    Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
    MemoryManager& memory_manager;
-
-    void ProcessExec();
-    void ProcessData(u32 data, bool is_last_call);
+    Upload::State upload_state;
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
                  "Field " #field_name " has invalid position")

-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec, 0x6C);
 ASSERT_REG_POSITION(data, 0x6D);
 #undef ASSERT_REG_POSITION
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;

 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                     MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
-                                                                                  *this} {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+      macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
    InitializeRegisterDefaults();
 }

@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        ProcessSyncPoint();
        break;
    }
+    case MAXWELL3D_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case MAXWELL3D_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
    default:
        break;
    }
@@ -430,7 +442,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    const auto a_type = tic_entry.a_type.Value();

    // TODO(Subv): Different data types for separate components are not supported
-    ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+    DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);

    return tic_entry;
 }
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
 #include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))

@@ -580,7 +587,18 @@ public:
                    u32 bind;
                } macros;

-                INSERT_PADDING_WORDS(0x69);
+                INSERT_PADDING_WORDS(0x17);
+
+                Upload::Registers upload;
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x44);

                struct {
                    union {
@@ -1176,6 +1194,8 @@ private:
    /// Interpreter for the macro codes uploaded to the GPU.
    MacroInterpreter macro_interpreter;

+    Upload::State upload_state;
+
    /// Retrieves information about a specific TIC entry from the TIC buffer.
    Texture::TICEntry GetTICEntry(u32 tic_index) const;

@@ -1219,6 +1239,9 @@ private:
                  "Field " #field_name " has invalid position")

 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {

    ASSERT(regs.exec.enable_2d == 1);

-    const std::size_t copy_size = regs.x_count * regs.y_count;
-
-    auto source_ptr{memory_manager.GetPointer(source)};
-    auto dst_ptr{memory_manager.GetPointer(dest)};
-
-    if (!source_ptr) {
-        LOG_ERROR(HW_GPU, "source_ptr is invalid");
-        return;
-    }
-
-    if (!dst_ptr) {
-        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
-        return;
-    }
-
-    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
-        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
-        // copying.
-        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
-
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
-    };
-
    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
        ASSERT(regs.src_params.size_z == 1);
        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-
        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const std::size_t src_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());

-        FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
-                           copy_size * src_bytes_per_pixel);
+        const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
-                                  regs.src_params.pos_y);
+                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+                                  write_buffer.data(), regs.src_params.BlockHeight(),
+                                  regs.src_params.pos_x, regs.src_params.pos_y);
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    } else {
-        ASSERT(regs.dst_params.size_z == 1);
-        ASSERT(regs.src_pitch == regs.x_count);
+        ASSERT(regs.dst_params.BlockDepth() == 1);

-        const u32 src_bpp = regs.src_pitch / regs.x_count;
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;

-        FlushAndInvalidate(regs.src_pitch * regs.y_count,
-                           regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+        const std::size_t dst_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t dst_layer_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t src_size = regs.src_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+                                src_bytes_per_pixel,
+                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+                                read_buffer.data(), regs.dst_params.BlockHeight());
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    }
 }

--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
 class MaxwellDMA final {
 public:
    explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:

        static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");

+        enum class ComponentMode : u32 {
+            Src0 = 0,
+            Src1 = 1,
+            Src2 = 2,
+            Src3 = 3,
+            Const0 = 4,
+            Const1 = 5,
+            Zero = 6,
+        };
+
        enum class CopyMode : u32 {
            None = 0,
            Unk1 = 1,
@@ -128,7 +144,26 @@ public:
                u32 x_count;
                u32 y_count;

-                INSERT_PADDING_WORDS(0xBB);
+                INSERT_PADDING_WORDS(0xB8);
+
+                u32 const0;
+                u32 const1;
+                union {
+                    BitField<0, 4, ComponentMode> component0;
+                    BitField<4, 4, ComponentMode> component1;
+                    BitField<8, 4, ComponentMode> component2;
+                    BitField<12, 4, ComponentMode> component3;
+                    BitField<16, 2, u32> component_size;
+                    BitField<20, 3, u32> src_num_components;
+                    BitField<24, 3, u32> dst_num_components;
+
+                    u32 SrcBytePerPixel() const {
+                        return src_num_components.Value() * component_size.Value();
+                    }
+                    u32 DstBytePerPixel() const {
+                        return dst_num_components.Value() * component_size.Value();
+                    }
+                } swizzle_config;

                Parameters dst_params;

@@ -149,6 +184,9 @@ private:

    MemoryManager& memory_manager;

+    std::vector<u8> read_buffer;
+    std::vector<u8> write_buffer;
+
    /// Performs the copy from the source buffer to the destination buffer as configured in the
    /// registers.
    void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
 ASSERT_REG_POSITION(dst_pitch, 0x105);
 ASSERT_REG_POSITION(x_count, 0x106);
 ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
 ASSERT_REG_POSITION(dst_params, 0x1C3);
 ASSERT_REG_POSITION(src_params, 0x1CA);

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 }

 GPU::~GPU() = default;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
                renderer.Rasterizer().FlushRegion(data->addr, data->size);
            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+            } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
                return;
            } else {
                UNREACHABLE();
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
    UpdatePageTableForVMA(initial_vma);
 }

+MemoryManager::~MemoryManager() = default;
+
 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
    const u64 aligned_size{Common::AlignUp(size, page_size)};
    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
    return {};
 }

-bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
    const GPUVAddr end = start + size;
    const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
    const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
-    const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+    const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
    return range == size;
 }

--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -47,7 +47,8 @@ struct VirtualMemoryArea {

 class MemoryManager final {
 public:
-    MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    ~MemoryManager();

    GPUVAddr AllocateSpace(u64 size, u64 align);
    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -65,18 +66,18 @@ public:
    u8* GetPointer(GPUVAddr addr);
    const u8* GetPointer(GPUVAddr addr) const;

-    // Returns true if the block is continous in host memory, false otherwise
-    bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+    /// Returns true if the block is continuous in host memory, false otherwise
+    bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;

    /**
     * ReadBlock and WriteBlock are full read and write operations over virtual
-     * GPU Memory. It's important to use these when GPU memory may not be continous
+     * GPU Memory. It's important to use these when GPU memory may not be continuous
     * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
     * Flushes and Invalidations, respectively to each operation.
     */
-    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

    /**
     * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -88,9 +89,9 @@ public:
     * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
     * being flushed.
     */
-    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

 private:
    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
@@ -111,10 +112,10 @@ private:
    /**
     * Maps an unmanaged host memory pointer at a given address.
     *
-     * @param target The guest address to start the mapping at.
-     * @param memory The memory to be mapped.
-     * @param size Size of the mapping.
-     * @param state MemoryState tag to attach to the VMA.
+     * @param target       The guest address to start the mapping at.
+     * @param memory       The memory to be mapped.
+     * @param size         Size of the mapping in bytes.
+     * @param backing_addr The base address of the range to back this mapping.
     */
    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);

@@ -124,7 +125,7 @@ private:
    /// Converts a VMAHandle to a mutable VMAIter.
    VMAIter StripIterConstness(const VMAHandle& iter);

-    /// Marks as the specfied VMA as allocated.
+    /// Marks as the specified VMA as allocated.
    VMAIter Allocate(VMAIter vma);

    /**
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -37,9 +37,6 @@ public:
    /// Gets the size of the shader in guest memory, required for cache management
    virtual std::size_t GetSizeInBytes() const = 0;

-    /// Wriets any cached resources back to memory
-    virtual void Flush() = 0;
-
    /// Sets whether the cached object should be considered registered
    void SetIsRegistered(bool registered) {
        is_registered = registered;
@@ -158,6 +155,8 @@ protected:
        return ++modified_ticks;
    }

+    virtual void FlushObjectInner(const T& object) = 0;
+
    /// Flushes the specified object, updating appropriate cache state as needed
    void FlushObject(const T& object) {
        std::lock_guard lock{mutex};
@@ -165,7 +164,7 @@ protected:
        if (!object->IsDirty()) {
            return;
        }
-        object->Flush();
+        FlushObjectInner(object);
        object->MarkAsModified(false, *this);
    }

--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -42,9 +42,6 @@ public:
        return alignment;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
    VAddr cpu_addr{};
    std::size_t size{};
@@ -75,6 +72,9 @@ public:
 protected:
    void AlignBuffer(std::size_t alignment);

+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
    OGLStreamBuffer stream_buffer;

--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -46,7 +46,7 @@ public:
    /// Reloads the global region from guest memory
    void Reload(u32 size_);

-    void Flush() override;
+    void Flush();

 private:
    VAddr cpu_addr{};
@@ -65,6 +65,11 @@ public:
    GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);

+protected:
+    void FlushObjectInner(const GlobalRegion& object) override {
+        object->Flush();
+    }
+
 private:
    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -922,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        viewport.y = viewport_rect.bottom;
        viewport.width = viewport_rect.GetWidth();
        viewport.height = viewport_rect.GetHeight();
-        viewport.depth_range_far = regs.viewports[i].depth_range_far;
-        viewport.depth_range_near = regs.viewports[i].depth_range_near;
+        viewport.depth_range_far = src.depth_range_far;
+        viewport.depth_range_near = src.depth_range_near;
    }
    state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
    state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
-void CachedSurface::LoadGLBuffer() {
+void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-    gl_buffer.resize(params.max_mip_level);
+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+    if (gl_buffer.size() < params.max_mip_level)
+        gl_buffer.resize(params.max_mip_level);
    for (u32 i = 0; i < params.max_mip_level; i++)
        gl_buffer[i].resize(params.GetMipmapSizeGL(i));
    if (params.is_tiled) {
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer() {
+void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);

    ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");

+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
    // OpenGL temporary buffer needs to be big enough to store raw texture size
-    gl_buffer.resize(1);
    gl_buffer[0].resize(GetSizeInBytes());

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
    }
 }

-void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
-                                          GLuint draw_fb_handle) {
+void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                                          GLuint read_fb_handle, GLuint draw_fb_handle) {
    const auto& rect{params.GetRect(mip_map)};

+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+
    // Load data from memory to the surface
    const auto x0 = static_cast<GLint>(rect.left);
    const auto y0 = static_cast<GLint>(rect.bottom);
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
                                tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::TextureCubemap: {
-            std::size_t start = buffer_offset;
            for (std::size_t face = 0; face < params.depth; ++face) {
                glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
                                    static_cast<GLsizei>(rect.GetWidth()),
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
 }

 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
+                                    GLuint read_fb_handle, GLuint draw_fb_handle) {
    MICROPROFILE_SCOPE(OpenGL_TextureUL);

    for (u32 i = 0; i < params.max_mip_level; i++)
-        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+        UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
 }

 void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
 }

 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
-    surface->LoadGLBuffer();
-    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+    surface->LoadGLBuffer(temporal_memory);
+    surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
    surface->MarkAsModified(false, *this);
    surface->MarkForReload(false);
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -355,6 +355,12 @@ namespace OpenGL {

 class RasterizerOpenGL;

+// This is used to store temporary big buffers,
+// instead of creating/destroying all the time
+struct RasterizerTemporaryMemory {
+    std::vector<std::vector<u8>> gl_buffer;
+};
+
 class CachedSurface final : public RasterizerCacheObject {
 public:
    explicit CachedSurface(const SurfaceParams& params);
@@ -371,10 +377,6 @@ public:
        return memory_size;
    }

-    void Flush() override {
-        FlushGLBuffer();
-    }
-
    const OGLTexture& Texture() const {
        return texture;
    }
@@ -397,11 +399,12 @@ public:
    }

    // Read/Write data in Switch memory to/from gl_buffer
-    void LoadGLBuffer();
-    void FlushGLBuffer();
+    void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
+    void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);

    // Upload data in gl_buffer to this surface's texture
-    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
+                         GLuint draw_fb_handle);

    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
                       Tegra::Texture::SwizzleSource swizzle_y,
@@ -429,13 +432,13 @@ public:
    }

 private:
-    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                               GLuint read_fb_handle, GLuint draw_fb_handle);

    void EnsureTextureDiscrepantView();

    OGLTexture texture;
    OGLTexture discrepant_view;
-    std::vector<std::vector<u8>> gl_buffer;
    SurfaceParams params{};
    GLenum gl_target{};
    GLenum gl_internal_format{};
@@ -473,6 +476,11 @@ public:
    void SignalPreDrawCall();
    void SignalPostDrawCall();

+protected:
+    void FlushObjectInner(const Surface& object) override {
+        object->FlushGLBuffer(temporal_memory);
+    }
+
 private:
    void LoadSurface(const Surface& surface);
    Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -519,6 +527,8 @@ private:
    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
    Surface last_depth_buffer;

+    RasterizerTemporaryMemory temporal_memory;
+
    using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,

 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                     const Device& device)
-    : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
+    : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}

 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                      const VideoCore::DiskResourceLoadCallback& callback) {
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -57,9 +57,6 @@ public:
        return shader_length;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
    /// Gets the shader entries for the shader
    const GLShader::ShaderEntries& GetShaderEntries() const {
        return entries;
@@ -123,6 +120,10 @@ public:
    /// Gets the current specified shader stage program
    Shader GetStageProgram(Maxwell::ShaderProgram program);

+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const Shader& object) override {}
+
 private:
    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -871,17 +871,6 @@ private:
        return {};
    }

-    std::string Composite(Operation operation) {
-        std::string value = "vec4(";
-        for (std::size_t i = 0; i < 4; ++i) {
-            value += Visit(operation[i]);
-            if (i < 3)
-                value += ", ";
-        }
-        value += ')';
-        return value;
-    }
-
    template <Type type>
    std::string Add(Operation operation) {
        return GenerateBinaryInfix(operation, "+", type, type, type);
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -475,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
    ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");

    auto& usages{it->second};
-    ASSERT(usages.find(usage) == usages.end());
+    if (usages.find(usage) != usages.end()) {
+        // Skip this variant since the shader is already stored.
+        return;
+    }
    usages.insert(usage);

    FileUtil::IOFile file = AppendTransferableFile();
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -49,9 +49,6 @@ public:
        return alignment;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
    VAddr cpu_addr{};
    std::size_t size{};
@@ -87,6 +84,10 @@ public:
        return buffer_handle;
    }

+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
    void AlignBuffer(std::size_t alignment);

--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -315,7 +315,6 @@ private:
        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
                                                                         "overflow"};
        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
-            const auto flag_code = static_cast<InternalFlag>(flag);
            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
        }
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
                            bool is_array, bool is_aoffi) {
    const std::size_t coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);

    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -25,8 +25,8 @@

 class InputBitStream {
 public:
-    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
-        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
+        : m_CurByte(ptr), m_NextBit(start_offset % 8) {}

    ~InputBitStream() = default;

@@ -55,12 +55,9 @@ public:
    }

 private:
-    const int m_NumBits;
    const unsigned char* m_CurByte;
    int m_NextBit = 0;
    int m_BitsRead = 0;
-
-    bool done = false;
 };

 class OutputBitStream {
@@ -114,7 +111,6 @@ private:
    const int m_NumBits;
    unsigned char* m_CurByte;
    int m_NextBit = 0;
-    int m_BitsRead = 0;

    bool done = false;
 };
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
 std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                uint32_t depth, uint32_t block_width, uint32_t block_height) {
    uint32_t blockIdx = 0;
+    std::size_t depth_offset = 0;
    std::vector<uint8_t> outData(height * width * depth * 4);
    for (uint32_t k = 0; k < depth; k++) {
        for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                uint32_t decompWidth = std::min(block_width, width - i);
                uint32_t decompHeight = std::min(block_height, height - j);

-                uint8_t* outRow = outData.data() + (j * width + i) * 4;
+                uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
                for (uint32_t jj = 0; jj < decompHeight; jj++) {
                    memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
                }
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                blockIdx++;
            }
        }
+        depth_offset += height * width * 4;
    }

    return outData;
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -58,7 +58,7 @@ void CompatDB::Submit() {

        button(NextButton)->setEnabled(false);
        button(NextButton)->setText(tr("Submitting"));
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);

        testcase_watcher.setFuture(QtConcurrent::run(
            [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
                              tr("An error occured while sending the Testcase"));
        button(NextButton)->setEnabled(true);
        button(NextButton)->setText(tr("Next"));
-        button(QWizard::CancelButton)->setVisible(true);
+        button(CancelButton)->setVisible(true);
    } else {
        next();
        // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
        // workaround
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);
    }
 }

--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
    ui->hotkeysTab->Populate(registry);
    this->setConfiguration();
    this->PopulateSelectionList();
+
+    setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
+
    connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
            &ConfigureDialog::UpdateVisibleTabs);
+
    adjustSize();
    ui->selectorList->setCurrentRow(0);

--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -67,8 +67,6 @@ public:

 private:
    struct Hotkey {
-        Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
-
        QKeySequence keyseq;
        QShortcut* shortcut = nullptr;
        Qt::ShortcutContext context = Qt::WindowShortcut;
Author	SHA1	Message	Date
Fernando Sahmkow	fc975e9021	maxwell_3d: reduce sevirity of different component formats assert. This was reduced due to happening on most games and at such constant rate that it affected performance heavily for the end user. In general, we are well aware of the assert and an implementation is already planned.	2019-05-14 17:12:54 -04:00
Mat M	c4d549919f	Merge pull request #2462 from lioncash/video-mm video_core/memory_manager: Minor tidying	2019-05-14 06:40:33 -04:00
Mat M	dadcf317dc	Merge pull request #2461 from lioncash/unused-var video_core: Remove a few unused variables and functions	2019-05-14 06:36:26 -04:00
Mat M	8b933e77cd	Merge pull request #2460 from lioncash/volatile CMakeLists: Specify /volatile:iso for MSVC	2019-05-14 06:34:53 -04:00
Mat M	3e8e335a5c	Merge pull request #2450 from lioncash/warn-level CMakeLists: Explicitly specify -Wall for the non-MSVC case	2019-05-14 06:34:05 -04:00
Rodrigo Locatti	940a71089d	Merge pull request #2413 from FernandoS27/opt-gpu Rasterizer Cache: refactor flushing & optimize memory usage of surfaces	2019-05-13 23:01:59 -03:00
Lioncash	716fbaef74	video_core/memory_manager: Mark IsBlockContinuous() as a const member function Corrects the typo in its name and marks the function as a const member function, given it doesn't actually modify memory manager state.	2019-05-09 19:14:36 -04:00
Lioncash	d4bcd006b2	video_core/memory_manager: Mark the constructor as explicit Prevents implicit converting constructions of the memory manager.	2019-05-09 19:10:26 -04:00
Lioncash	fd12788967	video_core/memory_manager: Default the destructor within the cpp file Makes the class less surprising when it comes to forward declaring the type, and also prevents inlining the destruction code of the class, given it contains non-trivial types.	2019-05-09 19:10:13 -04:00
Lioncash	53afe47cec	video_core/memory_manager: Amend doxygen comments Corrects references to non-existent parameters and corrects typos.	2019-05-09 19:09:19 -04:00
Lioncash	5235b053b4	video_core/memory_manager: Remove superfluous const from function declarations These are able to be omitted from the declaration of functions, since they don't do anything at the type system level. The definitions of the functions can retain the use of const though, since they make the variables immutable in the implementation of the function where they're used.	2019-05-09 18:59:49 -04:00
Lioncash	b6408e9671	video_core/renderer_opengl/gl_shader_cache: Correct member initialization order Silences a -Wreorder warning.	2019-05-09 18:55:47 -04:00
Lioncash	e43ba3acd4	video_core/shader/decode/texture: Remove unused variable from GetTld4Code()	2019-05-09 18:49:56 -04:00
Lioncash	e3c45b4338	renderer_vulkan/vk_shader_decompiler: Remove unused variable from DeclareInternalFlags()	2019-05-09 18:47:48 -04:00
Lioncash	175fe8aaeb	video_core/renderer_opengl/gl_shader_decompiler: Remove unused Composite() function This isn't used at all, so it can be removed.	2019-05-09 18:45:26 -04:00
Lioncash	6d28d288a3	video_core/renderer_opengl/gl_rasterizer_cache: Remove unused variable in UploadGLMipmapTexture() This variable is unused entirely, so it can be removed.	2019-05-09 18:42:48 -04:00
Lioncash	ba165b1092	video_core/gpu_thread: Remove unused local variable Instead of retrieving the data from the std::variant instance, we can just check if the variant contains that type of data. This is essentially the same behavior, only it returns a bool indicating whether or not the type in the variant is currently active, instead of actually retrieving the data.	2019-05-09 18:39:21 -04:00
Lioncash	c56d893e77	video_core/textures/astc: Remove unused variables Silences a few compilation warnings.	2019-05-09 18:33:36 -04:00
Lioncash	c4d03f0154	CMakeLists: Specify /volatile:iso for MSVC By default, MSVC doesn't use standards-compliant volatile semantics. This makes it behave in a standards-compliant manner, making expectations more uniform across compilers.	2019-05-09 15:49:30 -04:00
bunnei	7cb17834c7	Merge pull request #2437 from lioncash/audctl service/audctl: Update documentation comments to be relative to 8.0.0	2019-05-09 13:24:13 -04:00
bunnei	f3317cf2db	Merge pull request #2454 from lioncash/cflag src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags	2019-05-09 13:23:49 -04:00
bunnei	daca045fcd	Merge pull request #2442 from FernandoS27/astc-fix Fix Layered ASTC Textures	2019-05-09 13:23:14 -04:00
bunnei	f69d3a6351	Merge pull request #2443 from ReinUsesLisp/skip-repeated-variants gl_shader_disk_cache: Skip stored shader variants instead of asserting	2019-05-09 13:22:42 -04:00
bunnei	5907619a04	Merge pull request #2445 from FearlessTobi/port-4749 Port citra-emu/citra#4749: "web_service: Misc fixes"	2019-05-09 13:22:00 -04:00
bunnei	9567b3a293	Merge pull request #2458 from lioncash/hotkey yuzu/hotkeys: Remove unnecessary constructor	2019-05-09 13:21:36 -04:00
bunnei	c6f3831320	Merge pull request #2456 from lioncash/qualifier yuzu/compatdb: Remove unnecessary qualifiers	2019-05-09 13:21:04 -04:00
bunnei	8abf0add04	Merge pull request #2459 from lioncash/what configure_dialog: Remove the Whats This? button from the dialog	2019-05-09 13:20:12 -04:00
bunnei	5b6571c170	Merge pull request #2453 from lioncash/enum core/memory: Remove unused FlushMode enum	2019-05-09 13:19:49 -04:00
bunnei	c27b81cb85	Merge pull request #2429 from FernandoS27/compute Corrections and Implementation on GPU Engines	2019-05-09 13:19:22 -04:00
bunnei	0e9a17b029	Merge pull request #2440 from lioncash/dynarmic externals: Update dynarmic to master	2019-05-09 13:18:49 -04:00
Lioncash	f3c18d622e	configure_dialog: Remove the Whats This? button from the dialog	2019-05-09 03:20:13 -04:00
Lioncash	8bdef4f951	yuzu/hotkeys: Remove unnecessary constructor The behavior of the Hotkey constructor is already accomplished via in-class member initializers, so the constructor is superfluous here.	2019-05-09 02:17:22 -04:00
Lioncash	a97120efc1	yuzu/compatdb: Remove unnecessary qualifiers Keeps the code consistent in regards to how the buttons are referred to.	2019-05-09 01:08:06 -04:00
Lioncash	70c6506a7e	src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags The C++ standard allows constexpr variables declared with the extern keyword to have external linkage. Previously MSVC wasn't abiding by this. This just makes the compiler more standards compliant during builds. Given we currently don't make use of anything that would break by this, this is safe to enable.	2019-05-07 14:06:22 -04:00
Lioncash	6ca7241bd9	src/CMakeLists: Vertically order compilation flags Makes it much nicer to visually scan the options. This also starts the flag descriptions from the same column for the same reason.	2019-05-07 14:05:48 -04:00
Lioncash	495a8d8d95	core/memory: Remove unused FlushMode enum Recent changes to memory-related code resulted in this being unused, so we can remove it.	2019-05-07 13:55:17 -04:00
Lioncash	0964444529	externals: Update dynarmic to master Better instruction support has been added since the last update.	2019-05-07 10:39:25 -04:00
Merry	c63e68c480	Merge pull request #2451 from lioncash/travis travis: Update to using Xcode 10.2	2019-05-07 15:36:13 +01:00
Lioncash	4aefd45193	travis: Update to using Xcode 10.2 Keeps the CI toolchain updated. This is also necessary for updating dynarmic.	2019-05-07 06:40:30 -04:00
Rodrigo Locatti	6743982d28	Merge pull request #2447 from lioncash/dtor core/frontend/emu_window: Make GraphicsContext's destructor virtual	2019-05-07 05:54:04 -03:00
Rodrigo Locatti	57db3f6763	Merge pull request #2448 from lioncash/pragma common/zstd_compression: Remove #pragma once directive from source file	2019-05-07 05:51:37 -03:00
Rodrigo Locatti	a206418846	Merge pull request #2449 from lioncash/unused-var gl_rasterizer: Silence unused variable warnings	2019-05-07 05:50:59 -03:00
zhupengfei	10c4f23953	core/telemetry_session: Only create the backend when we really need it The backend is not used until we decide to submit the testcase/telemetry, and creating it early prevents users from updating the credentials properly while the games are running.	2019-05-04 19:45:48 +02:00
Lioncash	9e15193ef8	shader/decode/texture: Remove unused variable This isn't used anywhere, so we can get rid of it.	2019-05-04 02:10:38 -04:00
Lioncash	5d0dca73c6	CMakeLists: Explicitly specify -Wall for the non-MSVC case Ensures that -Wall is always active as a compilation flag.	2019-05-04 02:06:56 -04:00
Lioncash	08b270676b	gl_rasterizer: Silence unused variable warning Makes use of src, so it's not considered unused.	2019-05-04 02:00:17 -04:00
Lioncash	1230a0e7ce	core/frontend/emu_window: Make GraphicsContext's destructor virtual This class is used in a polymorphic context, so destruction of the context will lead to undefined behavior if the destructor isn't virtual.	2019-05-04 01:47:38 -04:00
Fernando Sahmkow	e64c41efe8	Refactors and name corrections.	2019-05-01 15:31:39 -04:00
ReinUsesLisp	4aa081b4e7	gl_shader_disk_cache: Skip stored shader variants instead of asserting Instead of asserting on already stored shader variants, silently skip them. This shouldn't be happening but when a shader is invalidated and it is not stored in the shader cache, this assert would hit and save that shader anyways when the asserts are disabled.	2019-05-01 00:36:11 -03:00
Fernando Sahmkow	95261639fb	Fix Layered ASTC Textures By adding the missing layer offset in ASTC compression.	2019-04-30 23:02:31 -04:00
Lioncash	565fce71b1	service/audctl: Update documentation comments to be relative to 8.0.0 The state of these service calls are still the same in version 8.0.0.	2019-04-27 23:17:58 -04:00
Fernando Sahmkow	b3118ee316	Fixes and Corrections to DMA Engine	2019-04-23 15:28:18 -04:00
Fernando Sahmkow	f1e5314f1a	Add Swizzle Parameters to the DMA engine	2019-04-23 11:21:00 -04:00
Fernando Sahmkow	e140e2ebc6	Add Documentation Headers to all the GPU Engines	2019-04-23 08:44:52 -04:00
Fernando Sahmkow	021d28c9b8	Corrections and styling	2019-04-23 08:02:24 -04:00
Fernando Sahmkow	701ce1c9d0	Implement Maxwell3D Data Upload	2019-04-22 19:27:36 -04:00
Fernando Sahmkow	e4ff140b99	Introduce skeleton of the GPU Compute Engine.	2019-04-22 19:05:43 -04:00
Fernando Sahmkow	a91d3fc639	Revamp Kepler Memory to use a subegine to manage uploads	2019-04-22 18:50:56 -04:00
Fernando Sahmkow	4c36b78567	Rasterizer Cache: Use a temporal storage for Surfaces loading/flushing. This PR should heavily reduce memory usage since temporal buffers are no longer stored per Surface but instead managed by the Rasterizer Cache.	2019-04-21 11:42:07 -04:00
Fernando Sahmkow	a3eb91ed8c	RasterizerCache Redesign: Flush flushing is now responsability of children caches instead of the cache object. This change will allow the specific cache to pass extra parameters on flushing and will allow more flexibility.	2019-04-19 20:44:56 -04:00