gl_rasterizer: Pass the right number of array quad vertices count

Merge pull request #2477 from ReinUsesLisp/fix-sdl2
yuzu_cmd: Make OpenGL's context current
2019-05-17 17:08:34 -03:00 · 2019-05-17 13:04:33 -04:00 · 2019-05-17 13:04:04 -04:00 · 2019-05-17 13:01:41 -04:00 · 2019-05-17 04:29:20 -03:00 · 2019-05-17 04:25:26 -03:00
70 changed files with 952 additions and 523 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - os: osx
      env: NAME="macos build"
      sudo: false
-      osx_image: xcode10.1
+      osx_image: xcode10.2
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
--- a/.travis/linux-mingw/build.sh
+++ b/.travis/linux-mingw/build.sh
@@ -1,3 +1,3 @@
 #!/bin/bash -ex
 mkdir "$HOME/.ccache" || true
-docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
+docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.travis/linux-mingw/docker.sh
--- a/.travis/linux-mingw/deps.sh
+++ b/.travis/linux-mingw/deps.sh
@@ -1,3 +1,3 @@
 #!/bin/sh -ex

-docker pull ubuntu:18.04
+docker pull yuzuemu/build-environments:linux-mingw
--- a/.travis/linux-mingw/docker.sh
+++ b/.travis/linux-mingw/docker.sh
@@ -1,16 +1,6 @@
 #!/bin/bash -ex

 cd /yuzu
-MINGW_PACKAGES="sdl2-mingw-w64 qt5base-mingw-w64 qt5tools-mingw-w64 libsamplerate-mingw-w64 qt5multimedia-mingw-w64"
-apt-get update
-apt-get install -y gpg wget git python3-pip python ccache g++-mingw-w64-x86-64 gcc-mingw-w64-x86-64 mingw-w64-tools cmake
-echo 'deb http://ppa.launchpad.net/tobydox/mingw-w64/ubuntu bionic main ' > /etc/apt/sources.list.d/extras.list
-apt-key adv --keyserver keyserver.ubuntu.com --recv '72931B477E22FEFD47F8DECE02FE5F12ADDE29B2'
-apt-get update
-apt-get install -y ${MINGW_PACKAGES}
-
-# fix a problem in current MinGW headers
-wget -q https://raw.githubusercontent.com/Alexpux/mingw-w64/d0d7f784833bbb0b2d279310ddc6afb52fe47a46/mingw-w64-headers/crt/errno.h -O /usr/x86_64-w64-mingw32/include/errno.h
 # override Travis CI unreasonable ccache size
 echo 'max_size = 3.0G' > "$HOME/.ccache/ccache.conf"

@@ -23,8 +13,8 @@ echo '' >> /bin/cmd
 chmod +x /bin/cmd

 mkdir build && cd build
-cmake .. -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
-make -j4
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+ninja

 # Clean up the dirty hacks
 rm /bin/uname && mv /bin/uname1 /bin/uname
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,4 +1,4 @@
 #!/bin/bash -ex

 mkdir -p "$HOME/.ccache"
-docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
+docker run -e ENABLE_COMPATIBILITY_REPORTING --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.travis/linux/docker.sh
--- a/.travis/linux/deps.sh
+++ b/.travis/linux/deps.sh
@@ -1,3 +1,3 @@
 #!/bin/sh -ex

-docker pull ubuntu:18.04
+docker pull yuzuemu/build-environments:linux-fresh
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -1,12 +1,9 @@
 #!/bin/bash -ex

-apt-get update
-apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev qtwebengine5-dev wget cmake ninja-build ccache
-
 cd /yuzu

 mkdir build && cd build
-cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -G Ninja
+cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
 ninja

 ccache -s
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,6 +7,7 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"

+# TODO: Build using ninja instead of make
 mkdir build && cd build
 cmake --version
 cmake .. -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DUSE_DISCORD_PRESENCE=ON
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -19,7 +19,7 @@ set(BUILD_VERSION "0")
 if (BUILD_REPOSITORY)
  # regex capture the string nightly or canary into CMAKE_MATCH_1
  string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
-  if (${CMAKE_MATCH_COUNT} GREATER 0)
+  if ("${CMAKE_MATCH_COUNT}" GREATER 0)
    # capitalize the first letter of each word in the repo name.
    string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
    foreach(WORD ${REPO_NAME_LIST})
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,6 +7,10 @@ include(DownloadExternals)
 add_library(catch-single-include INTERFACE)
 target_include_directories(catch-single-include INTERFACE catch/single_include)

+# libfmt
+add_subdirectory(fmt)
+add_library(fmt::fmt ALIAS fmt)
+
 # Dynarmic
 if (ARCHITECTURE_x86_64)
    set(DYNARMIC_TESTS OFF)
@@ -14,10 +18,6 @@ if (ARCHITECTURE_x86_64)
    add_subdirectory(dynarmic)
 endif()

-# libfmt
-add_subdirectory(fmt)
-add_library(fmt::fmt ALIAS fmt)
-
 # getopt
 if (MSVC)
    add_subdirectory(getopt)
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,15 +21,29 @@ if (MSVC)
    # Ensure that projects build with Unicode support.
    add_definitions(-DUNICODE -D_UNICODE)

-    # /W3 - Level 3 warnings
-    # /MP - Multi-threaded compilation
-    # /Zi - Output debugging information
-    # /Zo - enhanced debug info for optimized builds
-    # /permissive- - enables stricter C++ standards conformance checks
-    # /EHsc - C++-only exception handling semantics
-    # /Zc:throwingNew - let codegen assume `operator new` will never return null
-    # /Zc:inline - let codegen omit inline functions in object files
-    add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline)
+    # /W3                 - Level 3 warnings
+    # /MP                 - Multi-threaded compilation
+    # /Zi                 - Output debugging information
+    # /Zo                 - Enhanced debug info for optimized builds
+    # /permissive-        - Enables stricter C++ standards conformance checks
+    # /EHsc               - C++-only exception handling semantics
+    # /volatile:iso       - Use strict standards-compliant volatile semantics.
+    # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates
+    # /Zc:inline          - Let codegen omit inline functions in object files
+    # /Zc:throwingNew     - Let codegen assume `operator new` (without std::nothrow) will never return null
+    add_compile_options(
+        /W3
+        /MP
+        /Zi
+        /Zo
+        /permissive-
+        /EHsc
+        /std:c++latest
+        /volatile:iso
+        /Zc:externConstexpr
+        /Zc:inline
+        /Zc:throwingNew
+    )

    # /GS- - No stack buffer overflow checks
    add_compile_options("$<$<CONFIG:Release>:/GS->")
@@ -37,7 +51,10 @@ if (MSVC)
    set(CMAKE_EXE_LINKER_FLAGS_DEBUG   "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE)
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 else()
-    add_compile_options("-Wno-attributes")
+    add_compile_options(
+        -Wall
+        -Wno-attributes
+    )

    if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
        add_compile_options("-stdlib=libc++")
--- a/src/common/zstd_compression.cpp
+++ b/src/common/zstd_compression.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#pragma once
-
 #include <algorithm>
 #include <zstd.h>

--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -10,6 +10,8 @@

 namespace Core::Frontend {

+GraphicsContext::~GraphicsContext() = default;
+
 class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
                              public std::enable_shared_from_this<TouchState> {
 public:
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -19,6 +19,8 @@ namespace Core::Frontend {
 */
 class GraphicsContext {
 public:
+    virtual ~GraphicsContext();
+
    /// Makes the graphics context current for the caller thread
    virtual void MakeCurrent() = 0;

--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -241,7 +241,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
 }

 Process::Process(Core::System& system)
-    : WaitObject{system.Kernel()}, address_arbiter{system}, mutex{system}, system{system} {}
+    : WaitObject{system.Kernel()}, vm_manager{system},
+      address_arbiter{system}, mutex{system}, system{system} {}

 Process::~Process() = default;

--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -62,7 +62,7 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
    return true;
 }

-VMManager::VMManager() {
+VMManager::VMManager(Core::System& system) : system{system} {
    // Default to assuming a 39-bit address space. This way we have a sane
    // starting point with executables that don't provide metadata.
    Reset(FileSys::ProgramAddressSpaceType::Is39Bit);
@@ -111,7 +111,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    auto& system = Core::System::GetInstance();
    system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
                                            VMAPermission::ReadWriteExecute);
    system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
@@ -140,7 +139,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    auto& system = Core::System::GetInstance();
    system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
    system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
    system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
@@ -223,7 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {

    ASSERT(FindVMA(target)->second.size >= size);

-    auto& system = Core::System::GetInstance();
    system.ArmInterface(0).UnmapMemory(target, size);
    system.ArmInterface(1).UnmapMemory(target, size);
    system.ArmInterface(2).UnmapMemory(target, size);
@@ -376,7 +373,7 @@ ResultCode VMManager::UnmapCodeMemory(VAddr dst_address, VAddr src_address, u64
    Reprotect(src_vma_iter, VMAPermission::ReadWrite);

    if (dst_memory_state == MemoryState::ModuleCode) {
-        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+        system.InvalidateCpuInstructionCaches();
    }

    return unmap_result;
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,10 @@
 #include "core/hle/result.h"
 #include "core/memory.h"

+namespace Core {
+class System;
+}
+
 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
 }
@@ -321,7 +325,7 @@ class VMManager final {
 public:
    using VMAHandle = VMAMap::const_iterator;

-    VMManager();
+    explicit VMManager(Core::System& system);
    ~VMManager();

    /// Clears the address space map, re-initializing with a single free area.
@@ -712,5 +716,7 @@ private:
    // The end of the currently allocated heap. This is not an inclusive
    // end of the range. This is essentially 'base_address + current_size'.
    VAddr heap_end = 0;
+
+    Core::System& system;
 };
 } // namespace Kernel
--- a/src/core/hle/service/audio/audctl.cpp
+++ b/src/core/hle/service/audio/audctl.cpp
@@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Audio, "called.");

    // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
    constexpr s32 target_min_volume = 0;

    IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Audio, "called.");

    // This service function is currently hardcoded on the
-    // actual console to this value (as of 6.0.0).
+    // actual console to this value (as of 8.0.0).
    constexpr s32 target_max_volume = 15;

    IPC::ResponseBuilder rb{ctx, 3};
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -21,8 +21,6 @@
 #include "core/memory.h"
 #include "core/settings.h"

-#pragma optimize("", off)
-
 namespace Loader {
 namespace {
 struct MODHeader {
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr);

 std::string ReadCString(VAddr vaddr, std::size_t max_length);

-enum class FlushMode {
-    /// Write back modified surfaces to RAM
-    Flush,
-    /// Remove region from the cache
-    Invalidate,
-    /// Write back modified surfaces to RAM, and also remove them from the cache
-    FlushAndInvalidate,
-};
-
 /**
 * Mark each page touching the region as cached.
 */
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) {
 }

 TelemetrySession::TelemetrySession() {
-#ifdef ENABLE_WEB_SERVICE
-    backend = std::make_unique<WebService::TelemetryJson>(
-        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
-#else
-    backend = std::make_unique<Telemetry::NullVisitor>();
-#endif
    // Log one-time top-level information
    AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId());

@@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() {
                                .count()};
    AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time);

+#ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
+#else
+    auto backend = std::make_unique<Telemetry::NullVisitor>();
+#endif
+
    // Complete the session, submitting to web service if necessary
-    // This is just a placeholder to wrap up the session once the core completes and this is
-    // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service.
    field_collection.Accept(*backend);
    if (Settings::values.enable_telemetry)
        backend->Complete();
@@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() {

 bool TelemetrySession::SubmitTestcase() {
 #ifdef ENABLE_WEB_SERVICE
+    auto backend = std::make_unique<WebService::TelemetryJson>(
+        Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token);
    field_collection.Accept(*backend);
    return backend->SubmitTestcase();
 #else
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -39,7 +39,6 @@ public:

 private:
    Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
-    std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
 };

 /**
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
    dma_pusher.h
    debug_utils/debug_utils.cpp
    debug_utils/debug_utils.h
+    engines/engine_upload.cpp
+    engines/engine_upload.h
    engines/fermi_2d.cpp
    engines/fermi_2d.h
    engines/kepler_compute.cpp
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -105,6 +105,8 @@ bool DmaPusher::Step() {
                dma_state.non_incrementing = false;
                dma_increment_once = true;
                break;
+            default:
+                break;
            }
        }
    }
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,48 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/engines/engine_upload.h"
+#include "video_core/memory_manager.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra::Engines::Upload {
+
+State::State(MemoryManager& memory_manager, Registers& regs)
+    : memory_manager(memory_manager), regs(regs) {}
+
+void State::ProcessExec(const bool is_linear) {
+    write_offset = 0;
+    copy_size = regs.line_length_in * regs.line_count;
+    inner_buffer.resize(copy_size);
+    this->is_linear = is_linear;
+}
+
+void State::ProcessData(const u32 data, const bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
+    std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
+    write_offset += sub_copy_size;
+    if (!is_last_call) {
+        return;
+    }
+    const GPUVAddr address{regs.dest.Address()};
+    if (is_linear) {
+        memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
+    } else {
+        UNIMPLEMENTED_IF(regs.dest.z != 0);
+        UNIMPLEMENTED_IF(regs.dest.depth != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
+        UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
+        const std::size_t dst_size = Tegra::Texture::CalculateSize(
+            true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
+        tmp_buffer.resize(dst_size);
+        memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
+        Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
+                                      regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
+                                      tmp_buffer.data());
+        memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
+    }
+}
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,75 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines::Upload {
+
+struct Registers {
+    u32 line_length_in;
+    u32 line_count;
+
+    struct {
+        u32 address_high;
+        u32 address_low;
+        u32 pitch;
+        union {
+            BitField<0, 4, u32> block_width;
+            BitField<4, 4, u32> block_height;
+            BitField<8, 4, u32> block_depth;
+        };
+        u32 width;
+        u32 height;
+        u32 depth;
+        u32 z;
+        u32 x;
+        u32 y;
+
+        GPUVAddr Address() const {
+            return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
+        }
+
+        u32 BlockWidth() const {
+            return 1U << block_width.Value();
+        }
+
+        u32 BlockHeight() const {
+            return 1U << block_height.Value();
+        }
+
+        u32 BlockDepth() const {
+            return 1U << block_depth.Value();
+        }
+    } dest;
+};
+
+class State {
+public:
+    State(MemoryManager& memory_manager, Registers& regs);
+    ~State() = default;
+
+    void ProcessExec(const bool is_linear);
+    void ProcessData(const u32 data, const bool is_last_call);
+
+private:
+    u32 write_offset = 0;
+    u32 copy_size = 0;
+    std::vector<u8> inner_buffer;
+    std::vector<u8> tmp_buffer;
+    bool is_linear = false;
+    Registers& regs;
+    MemoryManager& memory_manager;
+};
+
+} // namespace Tegra::Engines::Upload
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as G80_2D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+ */
+
 #define FERMI2D_REG_INDEX(field_name)                                                              \
    (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))

--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
+#include "video_core/textures/decoders.h"

 namespace Tegra::Engines {

-KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                             MemoryManager& memory_manager)
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
+                                                                                  memory_manager,
+                                                                                  regs.upload} {}

 KeplerCompute::~KeplerCompute() = default;

@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
    regs.reg_array[method_call.method] = method_call.argument;

    switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
    case KEPLER_COMPUTE_REG_INDEX(launch):
-        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
-        // kernels)
-        UNREACHABLE_MSG("Compute shaders are not implemented");
+        ProcessLaunch();
        break;
    default:
        break;
    }
 }

+void KeplerCompute::ProcessLaunch() {
+
+    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
+                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
+
+    const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
+    LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
+}
+
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@

 #include <array>
 #include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"

+namespace Core {
+class System;
+}
+
 namespace Tegra {
 class MemoryManager;
 }

+namespace VideoCore {
+class RasterizerInterface;
+}
+
 namespace Tegra::Engines {

+/**
+ * This Engine is known as GK104_Compute. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
+ */
+
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))

 class KeplerCompute final {
 public:
-    explicit KeplerCompute(MemoryManager& memory_manager);
+    explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                           MemoryManager& memory_manager);
    ~KeplerCompute();

    static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:

        union {
            struct {
-                INSERT_PADDING_WORDS(0xAF);
+                INSERT_PADDING_WORDS(0x60);
+
+                Upload::Registers upload;
+
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x3F);
+
+                struct {
+                    u32 address;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
+                    }
+                } launch_desc_loc;
+
+                INSERT_PADDING_WORDS(0x1);

                u32 launch;

-                INSERT_PADDING_WORDS(0xC48);
+                INSERT_PADDING_WORDS(0x4A7);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tsc;
+
+                INSERT_PADDING_WORDS(0x3);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    u32 limit;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } tic;
+
+                INSERT_PADDING_WORDS(0x22);
+
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+                    GPUVAddr Address() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } code_loc;
+
+                INSERT_PADDING_WORDS(0x3FE);
+
+                u32 texture_const_buffer_index;
+
+                INSERT_PADDING_WORDS(0x374);
            };
            std::array<u32, NUM_REGS> reg_array;
        };
    } regs{};
+
+    struct LaunchParams {
+        static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        u32 program_start;
+
+        INSERT_PADDING_WORDS(0x2);
+
+        BitField<30, 1, u32> linked_tsc;
+
+        BitField<0, 31, u32> grid_dim_x;
+        union {
+            BitField<0, 16, u32> grid_dim_y;
+            BitField<16, 16, u32> grid_dim_z;
+        };
+
+        INSERT_PADDING_WORDS(0x3);
+
+        BitField<0, 16, u32> shared_alloc;
+
+        BitField<0, 31, u32> block_dim_x;
+        union {
+            BitField<0, 16, u32> block_dim_y;
+            BitField<16, 16, u32> block_dim_z;
+        };
+
+        union {
+            BitField<0, 8, u32> const_buffer_enable_mask;
+            BitField<29, 2, u32> cache_layout;
+        } memory_config;
+
+        INSERT_PADDING_WORDS(0x8);
+
+        struct {
+            u32 address_low;
+            union {
+                BitField<0, 8, u32> address_high;
+                BitField<15, 17, u32> size;
+            };
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
+                                             address_low);
+            }
+        } const_buffer_config[8];
+
+        union {
+            BitField<0, 20, u32> local_pos_alloc;
+            BitField<27, 5, u32> barrier_alloc;
+        };
+
+        union {
+            BitField<0, 20, u32> local_neg_alloc;
+            BitField<24, 5, u32> gpr_alloc;
+        };
+
+        INSERT_PADDING_WORDS(0x11);
+    } launch_description;
+
+    struct {
+        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
+    } state{};
+
    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
                  "KeplerCompute Regs has wrong size");

+    static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
+                  "KeplerCompute LaunchParams has wrong size");
+
    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);

 private:
+    Core::System& system;
+    VideoCore::RasterizerInterface& rasterizer;
    MemoryManager& memory_manager;
+    Upload::State upload_state;
+
+    void ProcessLaunch();
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                  "Field " #field_name " has invalid position")

+#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position)                                         \
+    static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4,               \
+                  "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(launch, 0xAF);
+ASSERT_REG_POSITION(tsc, 0x557);
+ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(code_loc, 0x582);
+ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
+ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
+ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
+ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
+ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);

 #undef ASSERT_REG_POSITION

--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@

 namespace Tegra::Engines {

-KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                           MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
+    : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}

 KeplerMemory::~KeplerMemory() = default;

@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {

    switch (method_call.method) {
    case KEPLERMEMORY_REG_INDEX(exec): {
-        ProcessExec();
+        upload_state.ProcessExec(regs.exec.linear != 0);
        break;
    }
    case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument, method_call.IsLastCall());
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
        break;
    }
    }
 }

-void KeplerMemory::ProcessExec() {
-    state.write_offset = 0;
-    state.copy_size = regs.line_length_in * regs.line_count;
-    state.inner_buffer.resize(state.copy_size);
-}
-
-void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
-    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
-    std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
-    state.write_offset += sub_copy_size;
-    if (is_last_call) {
-        const GPUVAddr address{regs.dest.Address()};
-        if (regs.exec.linear != 0) {
-            memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
-        } else {
-            UNIMPLEMENTED_IF(regs.dest.z != 0);
-            UNIMPLEMENTED_IF(regs.dest.depth != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
-            UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
-            const std::size_t dst_size = Tegra::Texture::CalculateSize(
-                true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
-            std::vector<u8> tmp_buffer(dst_size);
-            memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
-            Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
-                                          regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
-                                          state.inner_buffer.data(), tmp_buffer.data());
-            memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
-        }
-        system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-    }
-}
-
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"

 namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
 class MemoryManager;
 }

-namespace VideoCore {
-class RasterizerInterface;
-}
-
 namespace Tegra::Engines {

+/**
+ * This Engine is known as P2MF. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
+ */
+
 #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \
    (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))

 class KeplerMemory final {
 public:
-    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
-                 MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, MemoryManager& memory_manager);
    ~KeplerMemory();

    /// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
            struct {
                INSERT_PADDING_WORDS(0x60);

-                u32 line_length_in;
-                u32 line_count;
-
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-                    u32 pitch;
-                    union {
-                        BitField<0, 4, u32> block_width;
-                        BitField<4, 4, u32> block_height;
-                        BitField<8, 4, u32> block_depth;
-                    };
-                    u32 width;
-                    u32 height;
-                    u32 depth;
-                    u32 z;
-                    u32 x;
-                    u32 y;
-
-                    GPUVAddr Address() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-
-                    u32 BlockWidth() const {
-                        return 1U << block_width.Value();
-                    }
-
-                    u32 BlockHeight() const {
-                        return 1U << block_height.Value();
-                    }
-
-                    u32 BlockDepth() const {
-                        return 1U << block_depth.Value();
-                    }
-                } dest;
+                Upload::Registers upload;

                struct {
                    union {
@@ -96,28 +63,17 @@ public:
        };
    } regs{};

-    struct {
-        u32 write_offset = 0;
-        u32 copy_size = 0;
-        std::vector<u8> inner_buffer;
-    } state{};
-
 private:
    Core::System& system;
-    VideoCore::RasterizerInterface& rasterizer;
    MemoryManager& memory_manager;
-
-    void ProcessExec();
-    void ProcessData(u32 data, bool is_last_call);
+    Upload::State upload_state;
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
    static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \
                  "Field " #field_name " has invalid position")

-ASSERT_REG_POSITION(line_length_in, 0x60);
-ASSERT_REG_POSITION(line_count, 0x61);
-ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(upload, 0x60);
 ASSERT_REG_POSITION(exec, 0x6C);
 ASSERT_REG_POSITION(data, 0x6D);
 #undef ASSERT_REG_POSITION
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;

 Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                     MemoryManager& memory_manager)
-    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{
-                                                                                  *this} {
+    : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
+      macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
    InitializeRegisterDefaults();
 }

@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        ProcessSyncPoint();
        break;
    }
+    case MAXWELL3D_REG_INDEX(exec_upload): {
+        upload_state.ProcessExec(regs.exec_upload.linear != 0);
+        break;
+    }
+    case MAXWELL3D_REG_INDEX(data_upload): {
+        const bool is_last_call = method_call.IsLastCall();
+        upload_state.ProcessData(method_call.argument, is_last_call);
+        if (is_last_call) {
+            dirty_flags.OnMemoryWrite();
+        }
+        break;
+    }
    default:
        break;
    }
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 #include "video_core/macro_interpreter.h"
 #include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as GF100_3D. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
+ * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+ */
+
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))

@@ -243,9 +250,10 @@ public:
                    return "10_10_10_2";
                case Size::Size_11_11_10:
                    return "11_11_10";
+                default:
+                    UNREACHABLE();
+                    return {};
                }
-                UNREACHABLE();
-                return {};
            }

            std::string TypeString() const {
@@ -579,7 +587,18 @@ public:
                    u32 bind;
                } macros;

-                INSERT_PADDING_WORDS(0x69);
+                INSERT_PADDING_WORDS(0x17);
+
+                Upload::Registers upload;
+                struct {
+                    union {
+                        BitField<0, 1, u32> linear;
+                    };
+                } exec_upload;
+
+                u32 data_upload;
+
+                INSERT_PADDING_WORDS(0x44);

                struct {
                    union {
@@ -1175,6 +1194,8 @@ private:
    /// Interpreter for the macro codes uploaded to the GPU.
    MacroInterpreter macro_interpreter;

+    Upload::State upload_state;
+
    /// Retrieves information about a specific TIC entry from the TIC buffer.
    Texture::TICEntry GetTICEntry(u32 tic_index) const;

@@ -1218,6 +1239,9 @@ private:
                  "Field " #field_name " has invalid position")

 ASSERT_REG_POSITION(macros, 0x45);
+ASSERT_REG_POSITION(upload, 0x60);
+ASSERT_REG_POSITION(exec_upload, 0x6C);
+ASSERT_REG_POSITION(data_upload, 0x6D);
 ASSERT_REG_POSITION(sync_info, 0xB2);
 ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
 ASSERT_REG_POSITION(rt, 0x200);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {

    ASSERT(regs.exec.enable_2d == 1);

-    const std::size_t copy_size = regs.x_count * regs.y_count;
-
-    auto source_ptr{memory_manager.GetPointer(source)};
-    auto dst_ptr{memory_manager.GetPointer(dest)};
-
-    if (!source_ptr) {
-        LOG_ERROR(HW_GPU, "source_ptr is invalid");
-        return;
-    }
-
-    if (!dst_ptr) {
-        LOG_ERROR(HW_GPU, "dst_ptr is invalid");
-        return;
-    }
-
-    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
-        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
-        // copying.
-        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
-
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
-    };
-
    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
        ASSERT(regs.src_params.size_z == 1);
        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-
        const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+        const std::size_t src_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+            regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());

-        FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y,
-                           copy_size * src_bytes_per_pixel);
+        const std::size_t dst_size = regs.dst_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
-                                  regs.src_params.pos_y);
+                                  regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
+                                  write_buffer.data(), regs.src_params.BlockHeight(),
+                                  regs.src_params.pos_x, regs.src_params.pos_y);
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    } else {
-        ASSERT(regs.dst_params.size_z == 1);
-        ASSERT(regs.src_pitch == regs.x_count);
+        ASSERT(regs.dst_params.BlockDepth() == 1);

-        const u32 src_bpp = regs.src_pitch / regs.x_count;
+        const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;

-        FlushAndInvalidate(regs.src_pitch * regs.y_count,
-                           regs.dst_params.size_x * regs.dst_params.size_y * src_bpp);
+        const std::size_t dst_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+            regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t dst_layer_size = Texture::CalculateSize(
+            true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+            regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
+
+        const std::size_t src_size = regs.src_pitch * regs.y_count;
+
+        if (read_buffer.size() < src_size) {
+            read_buffer.resize(src_size);
+        }
+
+        if (write_buffer.size() < dst_size) {
+            write_buffer.resize(dst_size);
+        }
+
+        memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+        memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
+                                src_bytes_per_pixel,
+                                write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
+                                read_buffer.data(), regs.dst_params.BlockHeight());
+
+        memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
    }
 }

--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;

 namespace Tegra::Engines {

+/**
+ * This Engine is known as GK104_Copy. Documentation can be found in:
+ * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
+ */
+
 class MaxwellDMA final {
 public:
    explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:

        static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");

+        enum class ComponentMode : u32 {
+            Src0 = 0,
+            Src1 = 1,
+            Src2 = 2,
+            Src3 = 3,
+            Const0 = 4,
+            Const1 = 5,
+            Zero = 6,
+        };
+
        enum class CopyMode : u32 {
            None = 0,
            Unk1 = 1,
@@ -128,7 +144,26 @@ public:
                u32 x_count;
                u32 y_count;

-                INSERT_PADDING_WORDS(0xBB);
+                INSERT_PADDING_WORDS(0xB8);
+
+                u32 const0;
+                u32 const1;
+                union {
+                    BitField<0, 4, ComponentMode> component0;
+                    BitField<4, 4, ComponentMode> component1;
+                    BitField<8, 4, ComponentMode> component2;
+                    BitField<12, 4, ComponentMode> component3;
+                    BitField<16, 2, u32> component_size;
+                    BitField<20, 3, u32> src_num_components;
+                    BitField<24, 3, u32> dst_num_components;
+
+                    u32 SrcBytePerPixel() const {
+                        return src_num_components.Value() * component_size.Value();
+                    }
+                    u32 DstBytePerPixel() const {
+                        return dst_num_components.Value() * component_size.Value();
+                    }
+                } swizzle_config;

                Parameters dst_params;

@@ -149,6 +184,9 @@ private:

    MemoryManager& memory_manager;

+    std::vector<u8> read_buffer;
+    std::vector<u8> write_buffer;
+
    /// Performs the copy from the source buffer to the destination buffer as configured in the
    /// registers.
    void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
 ASSERT_REG_POSITION(dst_pitch, 0x105);
 ASSERT_REG_POSITION(x_count, 0x106);
 ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(const0, 0x1C0);
+ASSERT_REG_POSITION(const1, 0x1C1);
+ASSERT_REG_POSITION(swizzle_config, 0x1C2);
 ASSERT_REG_POSITION(dst_params, 0x1C3);
 ASSERT_REG_POSITION(src_params, 0x1CA);

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
 }

 GPU::~GPU() = default;
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
                renderer.Rasterizer().FlushRegion(data->addr, data->size);
            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+            } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
                return;
            } else {
                UNREACHABLE();
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste
    UpdatePageTableForVMA(initial_vma);
 }

+MemoryManager::~MemoryManager() = default;
+
 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
    const u64 aligned_size{Common::AlignUp(size, page_size)};
    const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
@@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
    return {};
 }

-bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) {
+bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const {
    const GPUVAddr end = start + size;
    const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start));
    const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end));
-    const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
+    const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start);
    return range == size;
 }

--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -47,7 +47,8 @@ struct VirtualMemoryArea {

 class MemoryManager final {
 public:
-    MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer);
+    ~MemoryManager();

    GPUVAddr AllocateSpace(u64 size, u64 align);
    GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -65,18 +66,18 @@ public:
    u8* GetPointer(GPUVAddr addr);
    const u8* GetPointer(GPUVAddr addr) const;

-    // Returns true if the block is continous in host memory, false otherwise
-    bool IsBlockContinous(const GPUVAddr start, const std::size_t size);
+    /// Returns true if the block is continuous in host memory, false otherwise
+    bool IsBlockContinuous(GPUVAddr start, std::size_t size) const;

    /**
     * ReadBlock and WriteBlock are full read and write operations over virtual
-     * GPU Memory. It's important to use these when GPU memory may not be continous
+     * GPU Memory. It's important to use these when GPU memory may not be continuous
     * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
     * Flushes and Invalidations, respectively to each operation.
     */
-    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

    /**
     * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -88,9 +89,9 @@ public:
     * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
     * being flushed.
     */
-    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const;
-    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size);
-    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size);
+    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
+    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
+    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);

 private:
    using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
@@ -111,10 +112,10 @@ private:
    /**
     * Maps an unmanaged host memory pointer at a given address.
     *
-     * @param target The guest address to start the mapping at.
-     * @param memory The memory to be mapped.
-     * @param size Size of the mapping.
-     * @param state MemoryState tag to attach to the VMA.
+     * @param target       The guest address to start the mapping at.
+     * @param memory       The memory to be mapped.
+     * @param size         Size of the mapping in bytes.
+     * @param backing_addr The base address of the range to back this mapping.
     */
    VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr);

@@ -124,7 +125,7 @@ private:
    /// Converts a VMAHandle to a mutable VMAIter.
    VMAIter StripIterConstness(const VMAHandle& iter);

-    /// Marks as the specfied VMA as allocated.
+    /// Marks as the specified VMA as allocated.
    VMAIter Allocate(VMAIter vma);

    /**
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -37,9 +37,6 @@ public:
    /// Gets the size of the shader in guest memory, required for cache management
    virtual std::size_t GetSizeInBytes() const = 0;

-    /// Wriets any cached resources back to memory
-    virtual void Flush() = 0;
-
    /// Sets whether the cached object should be considered registered
    void SetIsRegistered(bool registered) {
        is_registered = registered;
@@ -158,6 +155,8 @@ protected:
        return ++modified_ticks;
    }

+    virtual void FlushObjectInner(const T& object) = 0;
+
    /// Flushes the specified object, updating appropriate cache state as needed
    void FlushObject(const T& object) {
        std::lock_guard lock{mutex};
@@ -165,7 +164,7 @@ protected:
        if (!object->IsDirty()) {
            return;
        }
-        object->Flush();
+        FlushObjectInner(object);
        object->MarkAsModified(false, *this);
    }

--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -42,9 +42,6 @@ public:
        return alignment;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
    VAddr cpu_addr{};
    std::size_t size{};
@@ -75,6 +72,9 @@ public:
 protected:
    void AlignBuffer(std::size_t alignment);

+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
    OGLStreamBuffer stream_buffer;

--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -46,7 +46,7 @@ public:
    /// Reloads the global region from guest memory
    void Reload(u32 size_);

-    void Flush() override;
+    void Flush();

 private:
    VAddr cpu_addr{};
@@ -65,6 +65,11 @@ public:
    GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
                                 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);

+protected:
+    void FlushObjectInner(const GlobalRegion& object) override {
+        object->Flush();
+    }
+
 private:
    GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
    GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
            // MakeQuadArray always generates u32 indexes
            params.index_format = GL_UNSIGNED_INT;
            params.count = (regs.vertex_buffer.count / 4) * 6;
-            params.index_buffer_offset =
-                primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+            params.index_buffer_offset = primitive_assembler.MakeQuadArray(
+                regs.vertex_buffer.first, regs.vertex_buffer.count);
        }
        return params;
    }
@@ -305,6 +305,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            case Maxwell::ShaderProgram::Geometry:
                shader_program_manager->UseTrivialGeometryShader();
                break;
+            default:
+                break;
            }
            continue;
        }
@@ -920,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
        viewport.y = viewport_rect.bottom;
        viewport.width = viewport_rect.GetWidth();
        viewport.height = viewport_rect.GetHeight();
-        viewport.depth_range_far = regs.viewports[i].depth_range_far;
-        viewport.depth_range_near = regs.viewports[i].depth_range_near;
+        viewport.depth_range_far = src.depth_range_far;
+        viewport.depth_range_near = src.depth_range_near;
    }
    state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
    state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
-void CachedSurface::LoadGLBuffer() {
+void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-    gl_buffer.resize(params.max_mip_level);
+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+    if (gl_buffer.size() < params.max_mip_level)
+        gl_buffer.resize(params.max_mip_level);
    for (u32 i = 0; i < params.max_mip_level; i++)
        gl_buffer[i].resize(params.GetMipmapSizeGL(i));
    if (params.is_tiled) {
@@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() {
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
-void CachedSurface::FlushGLBuffer() {
+void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);

    ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");

+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
    // OpenGL temporary buffer needs to be big enough to store raw texture size
-    gl_buffer.resize(1);
    gl_buffer[0].resize(GetSizeInBytes());

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
@@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() {
    }
 }

-void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
-                                          GLuint draw_fb_handle) {
+void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                                          GLuint read_fb_handle, GLuint draw_fb_handle) {
    const auto& rect{params.GetRect(mip_map)};

+    auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
+
    // Load data from memory to the surface
    const auto x0 = static_cast<GLint>(rect.left);
    const auto y0 = static_cast<GLint>(rect.bottom);
@@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
                                tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::TextureCubemap: {
-            std::size_t start = buffer_offset;
            for (std::size_t face = 0; face < params.depth; ++face) {
                glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
                                    static_cast<GLsizei>(rect.GetWidth()),
@@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() {
 }

 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
+                                    GLuint read_fb_handle, GLuint draw_fb_handle) {
    MICROPROFILE_SCOPE(OpenGL_TextureUL);

    for (u32 i = 0; i < params.max_mip_level; i++)
-        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+        UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
 }

 void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
@@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
 }

 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
-    surface->LoadGLBuffer();
-    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
+    surface->LoadGLBuffer(temporal_memory);
+    surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
    surface->MarkAsModified(false, *this);
    surface->MarkForReload(false);
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -355,6 +355,12 @@ namespace OpenGL {

 class RasterizerOpenGL;

+// This is used to store temporary big buffers,
+// instead of creating/destroying all the time
+struct RasterizerTemporaryMemory {
+    std::vector<std::vector<u8>> gl_buffer;
+};
+
 class CachedSurface final : public RasterizerCacheObject {
 public:
    explicit CachedSurface(const SurfaceParams& params);
@@ -371,10 +377,6 @@ public:
        return memory_size;
    }

-    void Flush() override {
-        FlushGLBuffer();
-    }
-
    const OGLTexture& Texture() const {
        return texture;
    }
@@ -397,11 +399,12 @@ public:
    }

    // Read/Write data in Switch memory to/from gl_buffer
-    void LoadGLBuffer();
-    void FlushGLBuffer();
+    void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
+    void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);

    // Upload data in gl_buffer to this surface's texture
-    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
+                         GLuint draw_fb_handle);

    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
                       Tegra::Texture::SwizzleSource swizzle_y,
@@ -429,13 +432,13 @@ public:
    }

 private:
-    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+    void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
+                               GLuint read_fb_handle, GLuint draw_fb_handle);

    void EnsureTextureDiscrepantView();

    OGLTexture texture;
    OGLTexture discrepant_view;
-    std::vector<std::vector<u8>> gl_buffer;
    SurfaceParams params{};
    GLenum gl_target{};
    GLenum gl_internal_format{};
@@ -473,6 +476,11 @@ public:
    void SignalPreDrawCall();
    void SignalPostDrawCall();

+protected:
+    void FlushObjectInner(const Surface& object) override {
+        object->FlushGLBuffer(temporal_memory);
+    }
+
 private:
    void LoadSurface(const Surface& surface);
    Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -519,6 +527,8 @@ private:
    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
    Surface last_depth_buffer;

+    RasterizerTemporaryMemory temporal_memory;
+
    using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,

 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                     const Device& device)
-    : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {}
+    : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}

 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                      const VideoCore::DiskResourceLoadCallback& callback) {
@@ -363,6 +363,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
    if (stop_loading)
        return;

+    // Track if precompiled cache was altered during loading to know if we have to serialize the
+    // virtual precompiled cache file back to the hard drive
+    bool precompiled_cache_altered = false;
+
    // Build shaders
    if (callback)
        callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
@@ -384,6 +388,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
            if (!shader) {
                // Invalidate the precompiled cache if a shader dumped shader was rejected
                disk_cache.InvalidatePrecompiled();
+                precompiled_cache_altered = true;
                dumps.clear();
            }
        }
@@ -405,8 +410,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
        if (dumps.find(usage) == dumps.end()) {
            const auto& program = precompiled_programs.at(usage);
            disk_cache.SaveDump(usage, program->handle);
+            precompiled_cache_altered = true;
        }
    }
+
+    if (precompiled_cache_altered) {
+        disk_cache.SaveVirtualPrecompiledFile();
+    }
 }

 CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -57,9 +57,6 @@ public:
        return shader_length;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
    /// Gets the shader entries for the shader
    const GLShader::ShaderEntries& GetShaderEntries() const {
        return entries;
@@ -123,6 +120,10 @@ public:
    /// Gets the current specified shader stage program
    Shader GetStageProgram(Maxwell::ShaderProgram program);

+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const Shader& object) override {}
+
 private:
    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -871,17 +871,6 @@ private:
        return {};
    }

-    std::string Composite(Operation operation) {
-        std::string value = "vec4(";
-        for (std::size_t i = 0; i < 4; ++i) {
-            value += Visit(operation[i]);
-            if (i < 3)
-                value += ", ";
-        }
-        value += ')';
-        return value;
-    }
-
    template <Type type>
    std::string Add(Operation operation) {
        return GenerateBinaryInfix(operation, "+", type, type, type);
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,7 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
    return true;
 }

-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
+    : system{system}, precompiled_cache_virtual_file_offset{0} {}

 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
 ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -177,6 +178,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
            return {};
        }
    }
+
    return {{raws, usages}};
 }

@@ -208,59 +210,64 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
                        std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
 ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+    // Read compressed file from disk and decompress to virtual precompiled cache file
+    std::vector<u8> compressed(file.GetSize());
+    file.ReadBytes(compressed.data(), compressed.size());
+    const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
+    SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
+    precompiled_cache_virtual_file_offset = 0;
+
    ShaderCacheVersionHash file_hash{};
-    if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
+    if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
+        precompiled_cache_virtual_file_offset = 0;
        return {};
    }
    if (GetShaderCacheVersionHash() != file_hash) {
        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
+        precompiled_cache_virtual_file_offset = 0;
        return {};
    }

    std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
    std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
-    while (file.Tell() < file.GetSize()) {
+    while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
        PrecompiledEntryKind kind{};
-        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+        if (!LoadObjectFromPrecompiled(kind)) {
            return {};
        }

        switch (kind) {
        case PrecompiledEntryKind::Decompiled: {
            u64 unique_identifier{};
-            if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
+            if (!LoadObjectFromPrecompiled(unique_identifier)) {
                return {};
+            }

-            const auto entry = LoadDecompiledEntry(file);
-            if (!entry)
+            const auto entry = LoadDecompiledEntry();
+            if (!entry) {
                return {};
+            }
            decompiled.insert({unique_identifier, std::move(*entry)});
            break;
        }
        case PrecompiledEntryKind::Dump: {
            ShaderDiskCacheUsage usage;
-            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
+            if (!LoadObjectFromPrecompiled(usage)) {
                return {};
+            }

            ShaderDiskCacheDump dump;
-            if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
+            if (!LoadObjectFromPrecompiled(dump.binary_format)) {
                return {};
+            }

            u32 binary_length{};
-            u32 compressed_size{};
-            if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
-                file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
+            if (!LoadObjectFromPrecompiled(binary_length)) {
                return {};
            }

-            std::vector<u8> compressed_binary(compressed_size);
-            if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
-                compressed_binary.size()) {
-                return {};
-            }
-
-            dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
-            if (dump.binary.empty()) {
+            dump.binary.resize(binary_length);
+            if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
                return {};
            }

@@ -274,45 +281,41 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
    return {{decompiled, dumps}};
 }

-std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
-    FileUtil::IOFile& file) {
+std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
    u32 code_size{};
-    u32 compressed_code_size{};
-    if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
-        file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
+    if (!LoadObjectFromPrecompiled(code_size)) {
        return {};
    }

-    std::vector<u8> compressed_code(compressed_code_size);
-    if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+    std::vector<u8> code(code_size);
+    if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
        return {};
    }

-    const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
-    if (code.empty()) {
-        return {};
-    }
    ShaderDiskCacheDecompiled entry;
    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);

    u32 const_buffers_count{};
-    if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
+    if (!LoadObjectFromPrecompiled(const_buffers_count)) {
        return {};
+    }
+
    for (u32 i = 0; i < const_buffers_count; ++i) {
        u32 max_offset{};
        u32 index{};
        u8 is_indirect{};
-        if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
+        if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
+            !LoadObjectFromPrecompiled(is_indirect)) {
            return {};
        }
        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
    }

    u32 samplers_count{};
-    if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
+    if (!LoadObjectFromPrecompiled(samplers_count)) {
        return {};
+    }
+
    for (u32 i = 0; i < samplers_count; ++i) {
        u64 offset{};
        u64 index{};
@@ -320,12 +323,9 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
        u8 is_array{};
        u8 is_shadow{};
        u8 is_bindless{};
-        if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
-            file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
-            file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
-            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
-            file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
+        if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
+            !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
+            !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
            return {};
        }
        entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
@@ -335,17 +335,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
    }

    u32 global_memory_count{};
-    if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
+    if (!LoadObjectFromPrecompiled(global_memory_count)) {
        return {};
+    }
+
    for (u32 i = 0; i < global_memory_count; ++i) {
        u32 cbuf_index{};
        u32 cbuf_offset{};
        u8 is_read{};
        u8 is_written{};
-        if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
-            file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
-            file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
+        if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
+            !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
            return {};
        }
        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
@@ -354,74 +354,81 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn

    for (auto& clip_distance : entry.entries.clip_distances) {
        u8 clip_distance_raw{};
-        if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
+        if (!LoadObjectFromPrecompiled(clip_distance_raw))
            return {};
        clip_distance = clip_distance_raw != 0;
    }

    u64 shader_length{};
-    if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
+    if (!LoadObjectFromPrecompiled(shader_length)) {
        return {};
+    }
+
    entry.entries.shader_length = static_cast<std::size_t>(shader_length);

    return entry;
 }

-bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
-                                               const std::string& code,
-                                               const std::vector<u8>& compressed_code,
+bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
                                               const GLShader::ShaderEntries& entries) {
-    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
-        file.WriteObject(unique_identifier) != 1 ||
-        file.WriteObject(static_cast<u32>(code.size())) != 1 ||
-        file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
-        file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+    if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
+        !SaveObjectToPrecompiled(unique_identifier) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
+        !SaveArrayToPrecompiled(code.data(), code.size())) {
        return false;
    }

-    if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
+    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
        return false;
+    }
    for (const auto& cbuf : entries.const_buffers) {
-        if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
-            file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
-            file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
+        if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
+            !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
            return false;
        }
    }

-    if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
+    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
        return false;
+    }
    for (const auto& sampler : entries.samplers) {
-        if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
-            file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
-            file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
-            file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
-            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
-            file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
+        if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
+            !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
+            !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
            return false;
        }
    }

-    if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
+    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
        return false;
+    }
    for (const auto& gmem : entries.global_memory_entries) {
-        if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
-            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
-            file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
-            file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
+        if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
+            !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
            return false;
        }
    }

    for (const bool clip_distance : entries.clip_distances) {
-        if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
+        if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
            return false;
+        }
    }

-    return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
+    if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
+        return false;
+    }
+
+    return true;
 }

-void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
+void ShaderDiskCacheOpenGL::InvalidateTransferable() {
    if (!FileUtil::Delete(GetTransferablePath())) {
        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
                  GetTransferablePath());
@@ -429,7 +436,10 @@ void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
    InvalidatePrecompiled();
 }

-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
+void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
+    // Clear virtaul precompiled cache file
+    precompiled_cache_virtual_file.Resize(0);
+
    if (!FileUtil::Delete(GetPrecompiledPath())) {
        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
    }
@@ -465,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
    ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");

    auto& usages{it->second};
-    ASSERT(usages.find(usage) == usages.end());
+    if (usages.find(usage) != usages.end()) {
+        // Skip this variant since the shader is already stored.
+        return;
+    }
    usages.insert(usage);

    FileUtil::IOFile file = AppendTransferableFile();
@@ -485,22 +498,13 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
    if (!IsUsable())
        return;

-    const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
-        reinterpret_cast<const u8*>(code.data()), code.size())};
-    if (compressed_code.empty()) {
-        LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
-                  unique_identifier);
-        return;
+    if (precompiled_cache_virtual_file.GetSize() == 0) {
+        SavePrecompiledHeaderToVirtualPrecompiledCache();
    }

-    FileUtil::IOFile file = AppendPrecompiledFile();
-    if (!file.IsOpen())
-        return;
-
-    if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
+    if (!SaveDecompiledFile(unique_identifier, code, entries)) {
        LOG_ERROR(Render_OpenGL,
                  "Failed to save decompiled entry to the precompiled file - removing");
-        file.Close();
        InvalidatePrecompiled();
    }
 }
@@ -516,28 +520,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
    std::vector<u8> binary(binary_length);
    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());

-    const std::vector<u8> compressed_binary =
-        Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
-
-    if (compressed_binary.empty()) {
-        LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
-                  usage.unique_identifier);
-        return;
-    }
-
-    FileUtil::IOFile file = AppendPrecompiledFile();
-    if (!file.IsOpen())
-        return;
-
-    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
-        file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
-        file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
-        file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
-        file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
-            compressed_binary.size()) {
+    if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
+        !SaveObjectToPrecompiled(usage) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
+        !SaveArrayToPrecompiled(binary.data(), binary.size())) {
        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
                  usage.unique_identifier);
-        file.Close();
        InvalidatePrecompiled();
        return;
    }
@@ -570,28 +559,33 @@ FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
    return file;
 }

-FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
-    if (!EnsureDirectories())
-        return {};
+void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
+    const auto hash{GetShaderCacheVersionHash()};
+    if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
+        LOG_ERROR(
+            Render_OpenGL,
+            "Failed to write precompiled cache version hash to virtual precompiled cache file");
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
+    precompiled_cache_virtual_file_offset = 0;
+    const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
+    const std::vector<u8>& compressed =
+        Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());

    const auto precompiled_path{GetPrecompiledPath()};
-    const bool existed = FileUtil::Exists(precompiled_path);
+    FileUtil::IOFile file(precompiled_path, "wb");

-    FileUtil::IOFile file(precompiled_path, "ab");
    if (!file.IsOpen()) {
        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
-        return {};
+        return;
    }
-
-    if (!existed || file.GetSize() == 0) {
-        const auto hash{GetShaderCacheVersionHash()};
-        if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
-            LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
-                      precompiled_path);
-            return {};
-        }
+    if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
+        LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
+                  precompiled_path);
+        return;
    }
-    return file;
 }

 bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -16,6 +16,7 @@

 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/file_sys/vfs_vector.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"

@@ -172,10 +173,10 @@ public:
    LoadPrecompiled();

    /// Removes the transferable (and precompiled) cache file.
-    void InvalidateTransferable() const;
+    void InvalidateTransferable();

-    /// Removes the precompiled cache file.
-    void InvalidatePrecompiled() const;
+    /// Removes the precompiled cache file and clears virtual precompiled cache file.
+    void InvalidatePrecompiled();

    /// Saves a raw dump to the transferable file. Checks for collisions.
    void SaveRaw(const ShaderDiskCacheRaw& entry);
@@ -190,18 +191,21 @@ public:
    /// Saves a dump entry to the precompiled file. Does not check for collisions.
    void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);

+    /// Serializes virtual precompiled shader cache file to real file
+    void SaveVirtualPrecompiledFile();
+
 private:
    /// Loads the transferable cache. Returns empty on failure.
    std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
                            std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
    LoadPrecompiledFile(FileUtil::IOFile& file);

-    /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
-    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
+    /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
+    /// failure.
+    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();

    /// Saves a decompiled entry to the passed file. Returns true on success.
-    bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
-                            const std::vector<u8>& compressed_code,
+    bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
                            const GLShader::ShaderEntries& entries);

    /// Returns if the cache can be used
@@ -210,8 +214,8 @@ private:
    /// Opens current game's transferable file and write it's header if it doesn't exist
    FileUtil::IOFile AppendTransferableFile() const;

-    /// Opens current game's precompiled file and write it's header if it doesn't exist
-    FileUtil::IOFile AppendPrecompiledFile() const;
+    /// Save precompiled header to precompiled_cache_in_memory
+    void SavePrecompiledHeaderToVirtualPrecompiledCache();

    /// Create shader disk cache directories. Returns true on success.
    bool EnsureDirectories() const;
@@ -234,10 +238,42 @@ private:
    /// Get current game's title id
    std::string GetTitleID() const;

+    template <typename T>
+    bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
+        const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
+            data, length, precompiled_cache_virtual_file_offset);
+        precompiled_cache_virtual_file_offset += write_length;
+        return write_length == sizeof(T) * length;
+    }
+
+    template <typename T>
+    bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
+        const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
+            data, length, precompiled_cache_virtual_file_offset);
+        precompiled_cache_virtual_file_offset += read_length;
+        return read_length == sizeof(T) * length;
+    }
+
+    template <typename T>
+    bool SaveObjectToPrecompiled(const T& object) {
+        return SaveArrayToPrecompiled(&object, 1);
+    }
+
+    template <typename T>
+    bool LoadObjectFromPrecompiled(T& object) {
+        return LoadArrayFromPrecompiled(&object, 1);
+    }
+
    // Copre system
    Core::System& system;
    // Stored transferable shaders
    std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
+    // file
+    FileSys::VectorVfsFile precompiled_cache_virtual_file;
+    // Stores the current offset of the precompiled cache file for IO purposes
+    std::size_t precompiled_cache_virtual_file_offset;
+
    // The cache has been loaded at boot
    bool tried_to_load{};
 };
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -27,8 +27,7 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
    switch (attrib.type) {
    case Maxwell::VertexAttribute::Type::UnsignedInt:
-    case Maxwell::VertexAttribute::Type::UnsignedNorm: {
-
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8:
        case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -47,16 +46,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
            return GL_UNSIGNED_INT;
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_UNSIGNED_INT_2_10_10_10_REV;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            UNREACHABLE();
+            return {};
        }
-
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-        UNREACHABLE();
-        return {};
-    }
-
    case Maxwell::VertexAttribute::Type::SignedInt:
-    case Maxwell::VertexAttribute::Type::SignedNorm: {
-
+    case Maxwell::VertexAttribute::Type::SignedNorm:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_8:
        case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -75,14 +71,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
            return GL_INT;
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_INT_2_10_10_10_REV;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            UNREACHABLE();
+            return {};
        }
-
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-        UNREACHABLE();
-        return {};
-    }
-
-    case Maxwell::VertexAttribute::Type::Float: {
+    case Maxwell::VertexAttribute::Type::Float:
        switch (attrib.size) {
        case Maxwell::VertexAttribute::Size::Size_16:
        case Maxwell::VertexAttribute::Size::Size_16_16:
@@ -94,13 +88,16 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_32_32_32:
        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
            return GL_FLOAT;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            UNREACHABLE();
+            return {};
        }
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+        UNREACHABLE();
+        return {};
    }
-    }
-
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
-    UNREACHABLE();
-    return {};
 }

 inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -129,10 +126,11 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_TRIANGLES;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return GL_TRIANGLE_STRIP;
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
+        UNREACHABLE();
+        return {};
    }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
-    UNREACHABLE();
-    return {};
 }

 inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
@@ -186,9 +184,10 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
        } else {
            return GL_MIRROR_CLAMP_TO_EDGE;
        }
+    default:
+        LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+        return GL_REPEAT;
    }
-    LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
-    return GL_REPEAT;
 }

 inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -62,9 +62,10 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
    case Tegra::Texture::WrapMode::MirrorOnceBorder:
        UNIMPLEMENTED();
        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+        return {};
    }
-    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
-    return {};
 }

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -225,9 +226,10 @@ vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return vk::PrimitiveTopology::eTriangleList;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return vk::PrimitiveTopology::eTriangleStrip;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+        return {};
    }
-    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
-    return {};
 }

 vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -49,9 +49,6 @@ public:
        return alignment;
    }

-    // We do not have to flush this cache as things in it are never modified by us.
-    void Flush() override {}
-
 private:
    VAddr cpu_addr{};
    std::size_t size{};
@@ -87,6 +84,10 @@ public:
        return buffer_handle;
    }

+protected:
+    // We do not have to flush this cache as things in it are never modified by us.
+    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+
 private:
    void AlignBuffer(std::size_t alignment);

--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -315,7 +315,6 @@ private:
        constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
                                                                         "overflow"};
        for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
-            const auto flag_code = static_cast<InternalFlag>(flag);
            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
            internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
        }
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -116,6 +116,8 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
            // Continue scanning for an exit method.
            break;
        }
+        default:
+            break;
        }
    }
    return exit_method = ExitMethod::AlwaysReturn;
@@ -206,4 +208,4 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
    return pc + 1;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -9,6 +9,7 @@

 namespace VideoCommon::Shader {

+using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

@@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
        }
    }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");

    const bool negate_a =
        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
@@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a);

-    Node op_b = [&]() {
+    auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HMUL2_C:
-            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HADD2_R:
        case OpCode::Id::HMUL2_R:
-            return GetRegister(instr.gpr20);
+            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
        default:
            UNREACHABLE();
-            return Immediate(0);
+            return {HalfType::F32, Immediate(0)};
        }
    }();
-    op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b);
-    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+    op_b = UnpackHalfFloat(op_b, type_b);
+    // redeclaration to avoid a bug in clang with reusing local bindings in lambdas
+    Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);

    Node value = [&]() {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HADD2_R:
-            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
+            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt);
        case OpCode::Id::HMUL2_C:
        case OpCode::Id::HMUL2_R:
-            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
+            return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt);
        default:
            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
            return Immediate(0);
        }
    }();
+    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);

    SetRegister(bb, instr.gpr0, value);
@@ -68,4 +70,4 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -120,10 +120,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
                return Operation(OperationCode::FCeil, PRECISE, value);
            case Tegra::Shader::F2fRoundingOp::Trunc:
                return Operation(OperationCode::FTrunc, PRECISE, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+                                  static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+                return Immediate(0);
            }
-            UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
-                              static_cast<u32>(instr.conversion.f2f.rounding.Value()));
-            return Immediate(0);
        }();
        value = GetSaturatedFloat(value, instr.alu.saturate_d);

--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
        case OpCode::Id::HFMA2_CR:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
-            return {instr.hfma2.saturate, instr.hfma2.type_b,
+            return {instr.hfma2.saturate, HalfType::F32,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_RC:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
-                    instr.hfma2.type_b,
-                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
+                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HFMA2_RR:
            neg_b = instr.hfma2.rr.negate_b;
            neg_c = instr.hfma2.rr.negate_c;
@@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
            return {false, identity, Immediate(0), identity, Immediate(0)};
        }
    }();
-    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");

    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);

    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedHalfFloat(value, saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);

    SetRegister(bb, instr.gpr0, value);
@@ -70,4 +69,4 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
                            bool is_array, bool is_aoffi) {
    const std::size_t coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
-    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);

    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -56,9 +56,10 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
                    instr.xmad.mode,
                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                    GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+            return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
        }
-        UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
-        return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
    }();

    op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -439,11 +439,14 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
        return OperationCode::LogicalUGreaterEqual;
    case OperationCode::INegate:
        UNREACHABLE_MSG("Can't negate an unsigned integer");
+        return {};
    case OperationCode::IAbsolute:
        UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
+        return {};
+    default:
+        UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
+        return {};
    }
-    UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code));
-    return {};
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -178,39 +178,44 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
            return PixelFormat::ABGR8S;
        case Tegra::Texture::ComponentType::UINT:
            return PixelFormat::ABGR8UI;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::B5G6R5:
        switch (component_type) {
        case Tegra::Texture::ComponentType::UNORM:
            return PixelFormat::B5G6R5U;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::A2B10G10R10:
        switch (component_type) {
        case Tegra::Texture::ComponentType::UNORM:
            return PixelFormat::A2B10G10R10U;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::A1B5G5R5:
        switch (component_type) {
        case Tegra::Texture::ComponentType::UNORM:
            return PixelFormat::A1B5G5R5U;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R8:
        switch (component_type) {
        case Tegra::Texture::ComponentType::UNORM:
            return PixelFormat::R8U;
        case Tegra::Texture::ComponentType::UINT:
            return PixelFormat::R8UI;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::G8R8:
        // TextureFormat::G8R8 is actually ordered red then green, as such we can use
        // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
@@ -220,50 +225,55 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
            return PixelFormat::RG8U;
        case Tegra::Texture::ComponentType::SNORM:
            return PixelFormat::RG8S;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
        switch (component_type) {
        case Tegra::Texture::ComponentType::UNORM:
            return PixelFormat::RGBA16U;
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::RGBA16F;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::BF10GF11RF11:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::R11FG11FB10F;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
    case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::RGBA32F;
        case Tegra::Texture::ComponentType::UINT:
            return PixelFormat::RGBA32UI;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R32_G32:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::RG32F;
        case Tegra::Texture::ComponentType::UINT:
            return PixelFormat::RG32UI;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R32_G32_B32:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::RGB32F;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R16:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
@@ -276,18 +286,20 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
            return PixelFormat::R16UI;
        case Tegra::Texture::ComponentType::SINT:
            return PixelFormat::R16I;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::R32:
        switch (component_type) {
        case Tegra::Texture::ComponentType::FLOAT:
            return PixelFormat::R32F;
        case Tegra::Texture::ComponentType::UINT:
            return PixelFormat::R32UI;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::ZF32:
        return PixelFormat::Z32F;
    case Tegra::Texture::TextureFormat::Z16:
@@ -310,9 +322,10 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
            return PixelFormat::DXN2UNORM;
        case Tegra::Texture::ComponentType::SNORM:
            return PixelFormat::DXN2SNORM;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    case Tegra::Texture::TextureFormat::BC7U:
        return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
    case Tegra::Texture::TextureFormat::BC6H_UF16:
@@ -343,15 +356,17 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
            return PixelFormat::RG16UI;
        case Tegra::Texture::ComponentType::SINT:
            return PixelFormat::RG16I;
+        default:
+            break;
        }
-        LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
-        UNREACHABLE();
+        break;
    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
-                     static_cast<u32>(component_type));
-        UNREACHABLE();
-        return PixelFormat::ABGR8U;
+        break;
    }
+    LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
+                 static_cast<u32>(component_type));
+    UNREACHABLE();
+    return PixelFormat::ABGR8U;
 }

 ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
@@ -513,8 +528,9 @@ bool IsFormatBCn(PixelFormat format) {
    case PixelFormat::DXT45_SRGB:
    case PixelFormat::BC7U_SRGB:
        return true;
+    default:
+        return false;
    }
-    return false;
 }

 } // namespace VideoCore::Surface
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -25,8 +25,8 @@

 class InputBitStream {
 public:
-    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
-        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    explicit InputBitStream(const unsigned char* ptr, int start_offset = 0)
+        : m_CurByte(ptr), m_NextBit(start_offset % 8) {}

    ~InputBitStream() = default;

@@ -55,12 +55,9 @@ public:
    }

 private:
-    const int m_NumBits;
    const unsigned char* m_CurByte;
    int m_NextBit = 0;
    int m_BitsRead = 0;
-
-    bool done = false;
 };

 class OutputBitStream {
@@ -114,7 +111,6 @@ private:
    const int m_NumBits;
    unsigned char* m_CurByte;
    int m_NextBit = 0;
-    int m_BitsRead = 0;

    bool done = false;
 };
@@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC {
 std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                uint32_t depth, uint32_t block_width, uint32_t block_height) {
    uint32_t blockIdx = 0;
+    std::size_t depth_offset = 0;
    std::vector<uint8_t> outData(height * width * depth * 4);
    for (uint32_t k = 0; k < depth; k++) {
        for (uint32_t j = 0; j < height; j += block_height) {
@@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                uint32_t decompWidth = std::min(block_width, width - i);
                uint32_t decompHeight = std::min(block_height, height - j);

-                uint8_t* outRow = outData.data() + (j * width + i) * 4;
+                uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4;
                for (uint32_t jj = 0; jj < decompHeight; jj++) {
                    memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
                }
@@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he
                blockIdx++;
            }
        }
+        depth_offset += height * width * 4;
    }

    return outData;
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -58,7 +58,7 @@ void CompatDB::Submit() {

        button(NextButton)->setEnabled(false);
        button(NextButton)->setText(tr("Submitting"));
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);

        testcase_watcher.setFuture(QtConcurrent::run(
            [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); }));
@@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() {
                              tr("An error occured while sending the Testcase"));
        button(NextButton)->setEnabled(true);
        button(NextButton)->setText(tr("Next"));
-        button(QWizard::CancelButton)->setVisible(true);
+        button(CancelButton)->setVisible(true);
    } else {
        next();
        // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a
        // workaround
-        button(QWizard::CancelButton)->setVisible(false);
+        button(CancelButton)->setVisible(false);
    }
 }

--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry)
    ui->hotkeysTab->Populate(registry);
    this->setConfiguration();
    this->PopulateSelectionList();
+
+    setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint);
+
    connect(ui->selectorList, &QListWidget::itemSelectionChanged, this,
            &ConfigureDialog::UpdateVisibleTabs);
+
    adjustSize();
    ui->selectorList->setCurrentRow(0);

--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -69,16 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
 ConfigureGraphics::~ConfigureGraphics() = default;

 void ConfigureGraphics::setConfiguration() {
+    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
+
    ui->resolution_factor_combobox->setCurrentIndex(
        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
    ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
    ui->frame_limit->setValue(Settings::values.frame_limit);
+    ui->use_compatibility_profile->setEnabled(runtime_lock);
    ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile);
+    ui->use_disk_shader_cache->setEnabled(runtime_lock);
    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
-    ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
    ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
-    ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->force_30fps_mode->setEnabled(runtime_lock);
    ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
    UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
                                                 Settings::values.bg_blue));
--- a/src/yuzu/hotkeys.h
+++ b/src/yuzu/hotkeys.h
@@ -67,8 +67,6 @@ public:

 private:
    struct Hotkey {
-        Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {}
-
        QKeySequence keyseq;
        QShortcut* shortcut = nullptr;
        Qt::ShortcutContext context = Qt::WindowShortcut;
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) {

    SDL_SetMainReady();

+    const SDL_GLprofile profile = Settings::values.use_compatibility_profile
+                                      ? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY
+                                      : SDL_GL_CONTEXT_PROFILE_CORE;
+
    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
-    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
+    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile);
    SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
    SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
    SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -222,6 +222,7 @@ int main(int argc, char** argv) {

    system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");

+    emu_window->MakeCurrent();
    system.Renderer().Rasterizer().LoadDiskResources();

    while (emu_window->IsOpen()) {
Author	SHA1	Message	Date
ReinUsesLisp	a652e58c54	gl_rasterizer: Pass the right number of array quad vertices count	2019-05-17 17:08:34 -03:00
bunnei	6f1720a5b7	Merge pull request #2477 from ReinUsesLisp/fix-sdl2 yuzu_cmd: Make OpenGL's context current	2019-05-17 13:04:33 -04:00
bunnei	865025f612	Merge pull request #2478 from ReinUsesLisp/sdl2-compat yuzu_cmd: Use OpenGL compat when asked in the settings	2019-05-17 13:04:04 -04:00
bunnei	1975d32f2d	Merge pull request #2479 from ReinUsesLisp/qt-shadow qt/configure_graphics: Shadow options at runtime	2019-05-17 13:01:41 -04:00
ReinUsesLisp	4cf64f8e09	qt/configure_graphics: Shadow options at runtime Compatibility profile and the disk shader cache settings shouldn't be changed at runtime. This aims to address that shadowing those options.	2019-05-17 04:29:20 -03:00
ReinUsesLisp	69265e4504	yuzu_cmd: Use OpenGL compat when asked in the settings	2019-05-17 04:25:26 -03:00
ReinUsesLisp	5f877d9458	yuzu_cmd: Make OpenGL's context current The SDL2 frontend never bound the OpenGL context, resulting on a white screen and no-ops all over the backend.	2019-05-17 04:13:20 -03:00
Mat M	c4d549919f	Merge pull request #2462 from lioncash/video-mm video_core/memory_manager: Minor tidying	2019-05-14 06:40:33 -04:00
Mat M	dadcf317dc	Merge pull request #2461 from lioncash/unused-var video_core: Remove a few unused variables and functions	2019-05-14 06:36:26 -04:00
Mat M	8b933e77cd	Merge pull request #2460 from lioncash/volatile CMakeLists: Specify /volatile:iso for MSVC	2019-05-14 06:34:53 -04:00
Mat M	3e8e335a5c	Merge pull request #2450 from lioncash/warn-level CMakeLists: Explicitly specify -Wall for the non-MSVC case	2019-05-14 06:34:05 -04:00
Rodrigo Locatti	940a71089d	Merge pull request #2413 from FernandoS27/opt-gpu Rasterizer Cache: refactor flushing & optimize memory usage of surfaces	2019-05-13 23:01:59 -03:00
Lioncash	716fbaef74	video_core/memory_manager: Mark IsBlockContinuous() as a const member function Corrects the typo in its name and marks the function as a const member function, given it doesn't actually modify memory manager state.	2019-05-09 19:14:36 -04:00
Lioncash	d4bcd006b2	video_core/memory_manager: Mark the constructor as explicit Prevents implicit converting constructions of the memory manager.	2019-05-09 19:10:26 -04:00
Lioncash	fd12788967	video_core/memory_manager: Default the destructor within the cpp file Makes the class less surprising when it comes to forward declaring the type, and also prevents inlining the destruction code of the class, given it contains non-trivial types.	2019-05-09 19:10:13 -04:00
Lioncash	53afe47cec	video_core/memory_manager: Amend doxygen comments Corrects references to non-existent parameters and corrects typos.	2019-05-09 19:09:19 -04:00
Lioncash	5235b053b4	video_core/memory_manager: Remove superfluous const from function declarations These are able to be omitted from the declaration of functions, since they don't do anything at the type system level. The definitions of the functions can retain the use of const though, since they make the variables immutable in the implementation of the function where they're used.	2019-05-09 18:59:49 -04:00
Lioncash	b6408e9671	video_core/renderer_opengl/gl_shader_cache: Correct member initialization order Silences a -Wreorder warning.	2019-05-09 18:55:47 -04:00
Lioncash	e43ba3acd4	video_core/shader/decode/texture: Remove unused variable from GetTld4Code()	2019-05-09 18:49:56 -04:00
Lioncash	e3c45b4338	renderer_vulkan/vk_shader_decompiler: Remove unused variable from DeclareInternalFlags()	2019-05-09 18:47:48 -04:00
Lioncash	175fe8aaeb	video_core/renderer_opengl/gl_shader_decompiler: Remove unused Composite() function This isn't used at all, so it can be removed.	2019-05-09 18:45:26 -04:00
Lioncash	6d28d288a3	video_core/renderer_opengl/gl_rasterizer_cache: Remove unused variable in UploadGLMipmapTexture() This variable is unused entirely, so it can be removed.	2019-05-09 18:42:48 -04:00
Lioncash	ba165b1092	video_core/gpu_thread: Remove unused local variable Instead of retrieving the data from the std::variant instance, we can just check if the variant contains that type of data. This is essentially the same behavior, only it returns a bool indicating whether or not the type in the variant is currently active, instead of actually retrieving the data.	2019-05-09 18:39:21 -04:00
Lioncash	c56d893e77	video_core/textures/astc: Remove unused variables Silences a few compilation warnings.	2019-05-09 18:33:36 -04:00
Lioncash	c4d03f0154	CMakeLists: Specify /volatile:iso for MSVC By default, MSVC doesn't use standards-compliant volatile semantics. This makes it behave in a standards-compliant manner, making expectations more uniform across compilers.	2019-05-09 15:49:30 -04:00
bunnei	7cb17834c7	Merge pull request #2437 from lioncash/audctl service/audctl: Update documentation comments to be relative to 8.0.0	2019-05-09 13:24:13 -04:00
bunnei	f3317cf2db	Merge pull request #2454 from lioncash/cflag src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags	2019-05-09 13:23:49 -04:00
bunnei	daca045fcd	Merge pull request #2442 from FernandoS27/astc-fix Fix Layered ASTC Textures	2019-05-09 13:23:14 -04:00
bunnei	f69d3a6351	Merge pull request #2443 from ReinUsesLisp/skip-repeated-variants gl_shader_disk_cache: Skip stored shader variants instead of asserting	2019-05-09 13:22:42 -04:00
bunnei	5907619a04	Merge pull request #2445 from FearlessTobi/port-4749 Port citra-emu/citra#4749: "web_service: Misc fixes"	2019-05-09 13:22:00 -04:00
bunnei	9567b3a293	Merge pull request #2458 from lioncash/hotkey yuzu/hotkeys: Remove unnecessary constructor	2019-05-09 13:21:36 -04:00
bunnei	c6f3831320	Merge pull request #2456 from lioncash/qualifier yuzu/compatdb: Remove unnecessary qualifiers	2019-05-09 13:21:04 -04:00
bunnei	8abf0add04	Merge pull request #2459 from lioncash/what configure_dialog: Remove the Whats This? button from the dialog	2019-05-09 13:20:12 -04:00
bunnei	5b6571c170	Merge pull request #2453 from lioncash/enum core/memory: Remove unused FlushMode enum	2019-05-09 13:19:49 -04:00
bunnei	c27b81cb85	Merge pull request #2429 from FernandoS27/compute Corrections and Implementation on GPU Engines	2019-05-09 13:19:22 -04:00
bunnei	0e9a17b029	Merge pull request #2440 from lioncash/dynarmic externals: Update dynarmic to master	2019-05-09 13:18:49 -04:00
Lioncash	f3c18d622e	configure_dialog: Remove the Whats This? button from the dialog	2019-05-09 03:20:13 -04:00
Lioncash	8bdef4f951	yuzu/hotkeys: Remove unnecessary constructor The behavior of the Hotkey constructor is already accomplished via in-class member initializers, so the constructor is superfluous here.	2019-05-09 02:17:22 -04:00
Lioncash	a97120efc1	yuzu/compatdb: Remove unnecessary qualifiers Keeps the code consistent in regards to how the buttons are referred to.	2019-05-09 01:08:06 -04:00
Lioncash	70c6506a7e	src/CMakeLists: Add /Zc:externConstexpr to the MSVC build flags The C++ standard allows constexpr variables declared with the extern keyword to have external linkage. Previously MSVC wasn't abiding by this. This just makes the compiler more standards compliant during builds. Given we currently don't make use of anything that would break by this, this is safe to enable.	2019-05-07 14:06:22 -04:00
Lioncash	6ca7241bd9	src/CMakeLists: Vertically order compilation flags Makes it much nicer to visually scan the options. This also starts the flag descriptions from the same column for the same reason.	2019-05-07 14:05:48 -04:00
Lioncash	495a8d8d95	core/memory: Remove unused FlushMode enum Recent changes to memory-related code resulted in this being unused, so we can remove it.	2019-05-07 13:55:17 -04:00
Lioncash	0964444529	externals: Update dynarmic to master Better instruction support has been added since the last update.	2019-05-07 10:39:25 -04:00
Merry	c63e68c480	Merge pull request #2451 from lioncash/travis travis: Update to using Xcode 10.2	2019-05-07 15:36:13 +01:00
Lioncash	4aefd45193	travis: Update to using Xcode 10.2 Keeps the CI toolchain updated. This is also necessary for updating dynarmic.	2019-05-07 06:40:30 -04:00
Rodrigo Locatti	6743982d28	Merge pull request #2447 from lioncash/dtor core/frontend/emu_window: Make GraphicsContext's destructor virtual	2019-05-07 05:54:04 -03:00
Rodrigo Locatti	57db3f6763	Merge pull request #2448 from lioncash/pragma common/zstd_compression: Remove #pragma once directive from source file	2019-05-07 05:51:37 -03:00
Rodrigo Locatti	a206418846	Merge pull request #2449 from lioncash/unused-var gl_rasterizer: Silence unused variable warnings	2019-05-07 05:50:59 -03:00
zhupengfei	10c4f23953	core/telemetry_session: Only create the backend when we really need it The backend is not used until we decide to submit the testcase/telemetry, and creating it early prevents users from updating the credentials properly while the games are running.	2019-05-04 19:45:48 +02:00
Lioncash	9e15193ef8	shader/decode/texture: Remove unused variable This isn't used anywhere, so we can get rid of it.	2019-05-04 02:10:38 -04:00
Lioncash	5d0dca73c6	CMakeLists: Explicitly specify -Wall for the non-MSVC case Ensures that -Wall is always active as a compilation flag.	2019-05-04 02:06:56 -04:00
Lioncash	08b270676b	gl_rasterizer: Silence unused variable warning Makes use of src, so it's not considered unused.	2019-05-04 02:00:17 -04:00
Lioncash	a6f7a44aab	common/zstd_compression: Remove #pragma once directive from source file Introduced in `72477731ed`. This is only necessary within header files.	2019-05-04 01:54:29 -04:00
Lioncash	1230a0e7ce	core/frontend/emu_window: Make GraphicsContext's destructor virtual This class is used in a polymorphic context, so destruction of the context will lead to undefined behavior if the destructor isn't virtual.	2019-05-04 01:47:38 -04:00
bunnei	1f72bb733f	Merge pull request #2408 from FearlessTobi/port-4215 Port citra-emu/citra#4215: "travis: Use Ninja for Travis builds"	2019-05-02 20:59:09 -04:00
Fernando Sahmkow	e64c41efe8	Refactors and name corrections.	2019-05-01 15:31:39 -04:00
ReinUsesLisp	4aa081b4e7	gl_shader_disk_cache: Skip stored shader variants instead of asserting Instead of asserting on already stored shader variants, silently skip them. This shouldn't be happening but when a shader is invalidated and it is not stored in the shader cache, this assert would hit and save that shader anyways when the asserts are disabled.	2019-05-01 00:36:11 -03:00
Fernando Sahmkow	95261639fb	Fix Layered ASTC Textures By adding the missing layer offset in ASTC compression.	2019-04-30 23:02:31 -04:00
Lioncash	75a8b304d4	loader/nso: Remove left-in debug pragma Unintentionally introduced in `552d5071fa`	2019-04-30 22:55:53 -04:00
bunnei	fb420358a9	Merge pull request #2406 from FearlessTobi/port-3839 Port citra-emu/citra#3839: "travis: use prebuilt image"	2019-04-30 19:25:53 -04:00
bunnei	79e54abe19	Merge pull request #2100 from FreddyFunk/disk-cache-precompiled-file gl_shader_disk_cache: Improve precompiled shader cache generation speed and size	2019-04-30 19:24:01 -04:00
bunnei	91e239d66f	Merge pull request #2435 from ReinUsesLisp/misc-vc shader_ir: Miscellaneous fixes	2019-04-28 22:29:43 -04:00
bunnei	2be32eb3d2	Merge pull request #2412 from lioncash/system kernel/vm_manager: Remove usages of global system accessors	2019-04-28 22:27:14 -04:00
bunnei	c52233ec8b	Merge pull request #2322 from ReinUsesLisp/wswitch video_core: Silent -Wswitch warnings	2019-04-28 22:24:58 -04:00
bunnei	9a3737120d	Merge pull request #2423 from FernandoS27/half-correct Corrections on Half Float operations: HADD2 HMUL2 and HFMA2	2019-04-28 22:24:22 -04:00
Lioncash	565fce71b1	service/audctl: Update documentation comments to be relative to 8.0.0 The state of these service calls are still the same in version 8.0.0.	2019-04-27 23:17:58 -04:00
FreddyFunk	1a3ff252a4	Re added new lines at the end of files	2019-04-23 23:19:28 +02:00
unknown	3091b40691	gl_shader_disk_cache: Compress precompiled shader cache file with Zstandard	2019-04-23 22:24:31 +02:00
unknown	9db2c734c9	gl_shader_disk_cache: Use VectorVfsFile for the virtual precompiled shader cache file	2019-04-23 22:24:23 +02:00
unknown	3fe542cf60	gl_shader_disk_cache: Remove per shader compression	2019-04-23 21:40:01 +02:00
Fernando Sahmkow	b3118ee316	Fixes and Corrections to DMA Engine	2019-04-23 15:28:18 -04:00
Fernando Sahmkow	f1e5314f1a	Add Swizzle Parameters to the DMA engine	2019-04-23 11:21:00 -04:00
Fernando Sahmkow	e140e2ebc6	Add Documentation Headers to all the GPU Engines	2019-04-23 08:44:52 -04:00
Fernando Sahmkow	021d28c9b8	Corrections and styling	2019-04-23 08:02:24 -04:00
Fernando Sahmkow	701ce1c9d0	Implement Maxwell3D Data Upload	2019-04-22 19:27:36 -04:00
Fernando Sahmkow	e4ff140b99	Introduce skeleton of the GPU Compute Engine.	2019-04-22 19:05:43 -04:00
Fernando Sahmkow	a91d3fc639	Revamp Kepler Memory to use a subegine to manage uploads	2019-04-22 18:50:56 -04:00
Fernando Sahmkow	4c36b78567	Rasterizer Cache: Use a temporal storage for Surfaces loading/flushing. This PR should heavily reduce memory usage since temporal buffers are no longer stored per Surface but instead managed by the Rasterizer Cache.	2019-04-21 11:42:07 -04:00
Fernando Sahmkow	623b2e4b8f	Corrections Half Float operations on const buffers and implement saturation.	2019-04-20 21:11:33 -04:00
Fernando Sahmkow	a3eb91ed8c	RasterizerCache Redesign: Flush flushing is now responsability of children caches instead of the cache object. This change will allow the specific cache to pass extra parameters on flushing and will allow more flexibility.	2019-04-19 20:44:56 -04:00
ReinUsesLisp	fbe8d1ceaa	video_core: Silent -Wswitch warnings	2019-04-18 15:54:39 -03:00
Lioncash	b6a87b422e	kernel/vm_manager: Remove usages of global system accessors Makes the dependency on the system instance explicit within VMManager's interface.	2019-04-16 20:02:50 -04:00
Cameron Cawley	1f3cc036da	travis: Use Ninja for Travis builds	2019-04-16 01:06:34 +02:00
fearlessTobi	b67be7154d	GenerateSCMRev: fix Travis compilation on repo forks	2019-04-16 00:34:22 +02:00
liushuyu	a9f58593d4	travis: use prebuilt image (#3839 ) * travis: use prebuilt image * travis: use prebuilt image (MinGW)	2019-04-15 22:22:09 +02:00