gl_shader_decompiler: Re-implement TLDS lod

Merge pull request #2108 from FernandoS27/fix-cc
Fix incorrect value for CC bit in IADD
2019-02-12 17:03:07 -03:00 · 2019-02-12 10:39:03 -05:00 · 2019-02-12 10:20:29 -05:00 · 2019-02-12 10:20:15 -05:00 · 2019-02-11 18:46:45 -04:00 · 2019-02-11 16:44:43 -04:00
101 changed files with 2716 additions and 1338 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,19 +419,6 @@ function(create_target_directory_groups target_name)
    endforeach()
 endfunction()

-# Gets a UTC timstamp and sets the provided variable to it
-function(get_timestamp _var)
-    string(TIMESTAMP timestamp UTC)
-    set(${_var} "${timestamp}" PARENT_SCOPE)
-endfunction()
-
-# generate git/build information
-include(GetGitRevisionDescription)
-get_git_head_revision(GIT_REF_SPEC GIT_REV)
-git_describe(GIT_DESC --always --long --dirty)
-git_branch_name(GIT_BRANCH)
-get_timestamp(BUILD_DATE)
-
 enable_testing()
 add_subdirectory(externals)
 add_subdirectory(src)
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -0,0 +1,94 @@
+# Gets a UTC timstamp and sets the provided variable to it
+function(get_timestamp _var)
+    string(TIMESTAMP timestamp UTC)
+    set(${_var} "${timestamp}" PARENT_SCOPE)
+endfunction()
+
+list(APPEND CMAKE_MODULE_PATH "${SRC_DIR}/externals/cmake-modules")
+# generate git/build information
+include(GetGitRevisionDescription)
+get_git_head_revision(GIT_REF_SPEC GIT_REV)
+git_describe(GIT_DESC --always --long --dirty)
+git_branch_name(GIT_BRANCH)
+get_timestamp(BUILD_DATE)
+
+# Generate cpp with Git revision from template
+# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
+set(REPO_NAME "")
+set(BUILD_VERSION "0")
+if (BUILD_REPOSITORY)
+  # regex capture the string nightly or canary into CMAKE_MATCH_1
+  string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
+  if (${CMAKE_MATCH_COUNT} GREATER 0)
+    # capitalize the first letter of each word in the repo name.
+    string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
+    foreach(WORD ${REPO_NAME_LIST})
+      string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
+      string(SUBSTRING ${WORD} 1 -1 REMAINDER)
+      string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
+      set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
+    endforeach()
+    if (BUILD_TAG)
+      string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
+      if (${CMAKE_MATCH_COUNT} GREATER 0)
+        set(BUILD_VERSION ${CMAKE_MATCH_1})
+      endif()
+      if (BUILD_VERSION)
+        # This leaves a trailing space on the last word, but we actually want that
+        # because of how it's styled in the title bar.
+        set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
+      else()
+        set(BUILD_FULLNAME "")
+      endif()
+    endif()
+  endif()
+endif()
+
+# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
+set(VIDEO_CORE "${SRC_DIR}/src/video_core")
+set(HASH_FILES
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
+    "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
+    "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
+    "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
+    "${VIDEO_CORE}/shader/decode/bfe.cpp"
+    "${VIDEO_CORE}/shader/decode/bfi.cpp"
+    "${VIDEO_CORE}/shader/decode/conversion.cpp"
+    "${VIDEO_CORE}/shader/decode/ffma.cpp"
+    "${VIDEO_CORE}/shader/decode/float_set.cpp"
+    "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/half_set.cpp"
+    "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
+    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/memory.cpp"
+    "${VIDEO_CORE}/shader/decode/other.cpp"
+    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
+    "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
+    "${VIDEO_CORE}/shader/decode/shift.cpp"
+    "${VIDEO_CORE}/shader/decode/video.cpp"
+    "${VIDEO_CORE}/shader/decode/xmad.cpp"
+    "${VIDEO_CORE}/shader/decode.cpp"
+    "${VIDEO_CORE}/shader/shader_ir.cpp"
+    "${VIDEO_CORE}/shader/shader_ir.h"
+    "${VIDEO_CORE}/shader/track.cpp"
+)
+set(COMBINED "")
+foreach (F IN LISTS HASH_FILES)
+    file(READ ${F} TMP)
+    set(COMBINED "${COMBINED}${TMP}")
+endforeach()
+string(MD5 SHADER_CACHE_VERSION "${COMBINED}")
+configure_file("${SRC_DIR}/src/common/scm_rev.cpp.in" "scm_rev.cpp" @ONLY)
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,42 +1,69 @@
-# Generate cpp with Git revision from template
-# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well
-set(REPO_NAME "")
-set(BUILD_VERSION "0")
-if ($ENV{CI})
-  if ($ENV{TRAVIS})
+# Add a custom command to generate a new shader_cache_version hash when any of the following files change
+# NOTE: This is an approximation of what files affect shader generation, its possible something else
+# could affect the result, but much more unlikely than the following files. Keeping a list of files
+# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
+set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
+if (DEFINED ENV{CI})
+  if (DEFINED ENV{TRAVIS})
    set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
    set(BUILD_TAG $ENV{TRAVIS_TAG})
-  elseif($ENV{APPVEYOR})
+  elseif(DEFINED ENV{APPVEYOR})
    set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
    set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
  endif()
-  # regex capture the string nightly or canary into CMAKE_MATCH_1
-  string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
-  if (${CMAKE_MATCH_COUNT} GREATER 0)
-    # capitalize the first letter of each word in the repo name.
-    string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
-    foreach(WORD ${REPO_NAME_LIST})
-      string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
-      string(SUBSTRING ${WORD} 1 -1 REMAINDER)
-      string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
-      set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
-    endforeach()
-    if (BUILD_TAG)
-      string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
-      if (${CMAKE_MATCH_COUNT} GREATER 0)
-        set(BUILD_VERSION ${CMAKE_MATCH_1})
-      endif()
-      if (BUILD_VERSION)
-        # This leaves a trailing space on the last word, but we actually want that
-        # because of how it's styled in the title bar.
-        set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
-      else()
-        set(BUILD_FULLNAME "")
-      endif()
-    endif()
-  endif()
 endif()
-configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
+add_custom_command(OUTPUT scm_rev.cpp
+    COMMAND ${CMAKE_COMMAND}
+      -DSRC_DIR="${CMAKE_SOURCE_DIR}"
+      -DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
+      -DBUILD_TAG="${BUILD_TAG}"
+      -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
+    DEPENDS
+      # WARNING! It was too much work to try and make a common location for this list,
+      # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
+      "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
+      "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
+      "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
+      "${VIDEO_CORE}/shader/decode/bfe.cpp"
+      "${VIDEO_CORE}/shader/decode/bfi.cpp"
+      "${VIDEO_CORE}/shader/decode/conversion.cpp"
+      "${VIDEO_CORE}/shader/decode/ffma.cpp"
+      "${VIDEO_CORE}/shader/decode/float_set.cpp"
+      "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/half_set.cpp"
+      "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
+      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/memory.cpp"
+      "${VIDEO_CORE}/shader/decode/other.cpp"
+      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
+      "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
+      "${VIDEO_CORE}/shader/decode/shift.cpp"
+      "${VIDEO_CORE}/shader/decode/video.cpp"
+      "${VIDEO_CORE}/shader/decode/xmad.cpp"
+      "${VIDEO_CORE}/shader/decode.cpp"
+      "${VIDEO_CORE}/shader/shader_ir.cpp"
+      "${VIDEO_CORE}/shader/shader_ir.h"
+      "${VIDEO_CORE}/shader/track.cpp"
+      # and also check that the scm_rev files haven't changed
+      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
+      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
+      # technically we should regenerate if the git version changed, but its not worth the effort imo
+      "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
+)

 add_library(common STATIC
    alignment.h
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -34,7 +34,6 @@
 #include <limits>
 #include <type_traits>
 #include "common/common_funcs.h"
-#include "common/swap.h"

 /*
 * Abstract bitfield class
@@ -109,9 +108,15 @@
 * symptoms.
 */
 #pragma pack(1)
-template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
+template <std::size_t Position, std::size_t Bits, typename T>
 struct BitField {
 private:
+    // We hide the copy assigment operator here, because the default copy
+    // assignment would copy the full storage value, rather than just the bits
+    // relevant to this particular bit field.
+    // We don't delete it because we want BitField to be trivially copyable.
+    constexpr BitField& operator=(const BitField&) = default;
+
    // UnderlyingType is T for non-enum types and the underlying type of T if
    // T is an enumeration. Note that T is wrapped within an enable_if in the
    // former case to workaround compile errors which arise when using
@@ -122,11 +127,7 @@ private:
    // We store the value as the unsigned type to avoid undefined behaviour on value shifting
    using StorageType = std::make_unsigned_t<UnderlyingType>;

-    using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;
-
 public:
-    BitField& operator=(const BitField&) = default;
-
    /// Constants to allow limited introspection of fields if needed
    static constexpr std::size_t position = Position;
    static constexpr std::size_t bits = Bits;
@@ -171,7 +172,7 @@ public:
    }

    constexpr FORCE_INLINE void Assign(const T& value) {
-        storage = (static_cast<StorageType>(storage) & ~mask) | FormatValue(value);
+        storage = (storage & ~mask) | FormatValue(value);
    }

    constexpr T Value() const {
@@ -183,7 +184,7 @@ public:
    }

 private:
-    StorageTypeWithEndian storage;
+    StorageType storage;

    static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");

@@ -194,6 +195,3 @@ private:
    static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable in a BitField");
 };
 #pragma pack()
-
-template <std::size_t Position, std::size_t Bits, typename T>
-using BitFieldBE = BitField<Position, Bits, T, BETag>;
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -35,6 +35,7 @@
 #define KEYS_DIR "keys"
 #define LOAD_DIR "load"
 #define DUMP_DIR "dump"
+#define SHADER_DIR "shader"
 #define LOG_DIR "log"

 // Filenames
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
        paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
        paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
        paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
+        paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
        paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
        paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
        // TODO: Put the logs in a better location for each OS
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -31,6 +31,7 @@ enum class UserPath {
    SDMCDir,
    LoadDir,
    DumpDir,
+    ShaderDir,
    SysDataDir,
    UserDir,
 };
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -11,6 +11,7 @@
 #define BUILD_DATE   "@BUILD_DATE@"
 #define BUILD_FULLNAME "@BUILD_FULLNAME@"
 #define BUILD_VERSION "@BUILD_VERSION@"
+#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"

 namespace Common {

@@ -21,6 +22,7 @@ const char g_build_name[]   = BUILD_NAME;
 const char g_build_date[]   = BUILD_DATE;
 const char g_build_fullname[] = BUILD_FULLNAME;
 const char g_build_version[]  = BUILD_VERSION;
+const char g_shader_cache_version[] = SHADER_CACHE_VERSION;

 } // namespace

--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -13,5 +13,6 @@ extern const char g_build_name[];
 extern const char g_build_date[];
 extern const char g_build_fullname[];
 extern const char g_build_version[];
+extern const char g_shader_cache_version[];

 } // namespace Common
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,8 +17,6 @@

 #pragma once

-#include <type_traits>
-
 #if defined(_MSC_VER)
 #include <cstdlib>
 #elif defined(__linux__)
@@ -172,7 +170,7 @@ struct swap_struct_t {
    using swapped_t = swap_struct_t;

 protected:
-    T value;
+    T value = T();

    static T swap(T v) {
        return F::swap(v);
@@ -607,154 +605,52 @@ struct swap_double_t {
    }
 };

-template <typename T>
-struct swap_enum_t {
-    static_assert(std::is_enum_v<T>);
-    using base = std::underlying_type_t<T>;
-
-public:
-    swap_enum_t() = default;
-    swap_enum_t(const T& v) : value(swap(v)) {}
-
-    swap_enum_t& operator=(const T& v) {
-        value = swap(v);
-        return *this;
-    }
-
-    operator T() const {
-        return swap(value);
-    }
-
-    explicit operator base() const {
-        return static_cast<base>(swap(value));
-    }
-
-protected:
-    T value{};
-    // clang-format off
-    using swap_t = std::conditional_t<
-        std::is_same_v<base, u16>, swap_16_t<u16>, std::conditional_t<
-        std::is_same_v<base, s16>, swap_16_t<s16>, std::conditional_t<
-        std::is_same_v<base, u32>, swap_32_t<u32>, std::conditional_t<
-        std::is_same_v<base, s32>, swap_32_t<s32>, std::conditional_t<
-        std::is_same_v<base, u64>, swap_64_t<u64>, std::conditional_t<
-        std::is_same_v<base, s64>, swap_64_t<s64>, void>>>>>>;
-    // clang-format on
-    static T swap(T x) {
-        return static_cast<T>(swap_t::swap(static_cast<base>(x)));
-    }
-};
-
-struct SwapTag {}; // Use the different endianness from the system
-struct KeepTag {}; // Use the same endianness as the system
-
-template <typename T, typename Tag>
-struct AddEndian;
-
-// KeepTag specializations
-
-template <typename T>
-struct AddEndian<T, KeepTag> {
-    using type = T;
-};
-
-// SwapTag specializations
-
-template <>
-struct AddEndian<u8, SwapTag> {
-    using type = u8;
-};
-
-template <>
-struct AddEndian<u16, SwapTag> {
-    using type = swap_struct_t<u16, swap_16_t<u16>>;
-};
-
-template <>
-struct AddEndian<u32, SwapTag> {
-    using type = swap_struct_t<u32, swap_32_t<u32>>;
-};
-
-template <>
-struct AddEndian<u64, SwapTag> {
-    using type = swap_struct_t<u64, swap_64_t<u64>>;
-};
-
-template <>
-struct AddEndian<s8, SwapTag> {
-    using type = s8;
-};
-
-template <>
-struct AddEndian<s16, SwapTag> {
-    using type = swap_struct_t<s16, swap_16_t<s16>>;
-};
-
-template <>
-struct AddEndian<s32, SwapTag> {
-    using type = swap_struct_t<s32, swap_32_t<s32>>;
-};
-
-template <>
-struct AddEndian<s64, SwapTag> {
-    using type = swap_struct_t<s64, swap_64_t<s64>>;
-};
-
-template <>
-struct AddEndian<float, SwapTag> {
-    using type = swap_struct_t<float, swap_float_t<float>>;
-};
-
-template <>
-struct AddEndian<double, SwapTag> {
-    using type = swap_struct_t<double, swap_double_t<double>>;
-};
-
-template <typename T>
-struct AddEndian<T, SwapTag> {
-    static_assert(std::is_enum_v<T>);
-    using type = swap_enum_t<T>;
-};
-
-// Alias LETag/BETag as KeepTag/SwapTag depending on the system
 #if COMMON_LITTLE_ENDIAN
+using u16_le = u16;
+using u32_le = u32;
+using u64_le = u64;

-using LETag = KeepTag;
-using BETag = SwapTag;
+using s16_le = s16;
+using s32_le = s32;
+using s64_le = s64;

+using float_le = float;
+using double_le = double;
+
+using u64_be = swap_struct_t<u64, swap_64_t<u64>>;
+using s64_be = swap_struct_t<s64, swap_64_t<s64>>;
+
+using u32_be = swap_struct_t<u32, swap_32_t<u32>>;
+using s32_be = swap_struct_t<s32, swap_32_t<s32>>;
+
+using u16_be = swap_struct_t<u16, swap_16_t<u16>>;
+using s16_be = swap_struct_t<s16, swap_16_t<s16>>;
+
+using float_be = swap_struct_t<float, swap_float_t<float>>;
+using double_be = swap_struct_t<double, swap_double_t<double>>;
 #else

-using BETag = KeepTag;
-using LETag = SwapTag;
+using u64_le = swap_struct_t<u64, swap_64_t<u64>>;
+using s64_le = swap_struct_t<s64, swap_64_t<s64>>;
+
+using u32_le = swap_struct_t<u32, swap_32_t<u32>>;
+using s32_le = swap_struct_t<s32, swap_32_t<s32>>;
+
+using u16_le = swap_struct_t<u16, swap_16_t<u16>>;
+using s16_le = swap_struct_t<s16, swap_16_t<s16>>;
+
+using float_le = swap_struct_t<float, swap_float_t<float>>;
+using double_le = swap_struct_t<double, swap_double_t<double>>;
+
+using u16_be = u16;
+using u32_be = u32;
+using u64_be = u64;
+
+using s16_be = s16;
+using s32_be = s32;
+using s64_be = s64;
+
+using float_be = float;
+using double_be = double;

 #endif
-
-// Aliases for LE types
-using u16_le = AddEndian<u16, LETag>::type;
-using u32_le = AddEndian<u32, LETag>::type;
-using u64_le = AddEndian<u64, LETag>::type;
-
-using s16_le = AddEndian<s16, LETag>::type;
-using s32_le = AddEndian<s32, LETag>::type;
-using s64_le = AddEndian<s64, LETag>::type;
-
-template <typename T>
-using enum_le = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, LETag>::type>;
-
-using float_le = AddEndian<float, LETag>::type;
-using double_le = AddEndian<double, LETag>::type;
-
-// Aliases for BE types
-using u16_be = AddEndian<u16, BETag>::type;
-using u32_be = AddEndian<u32, BETag>::type;
-using u64_be = AddEndian<u64, BETag>::type;
-
-using s16_be = AddEndian<s16, BETag>::type;
-using s32_be = AddEndian<s32, BETag>::type;
-using s64_be = AddEndian<s64, BETag>::type;
-
-template <typename T>
-using enum_be = std::enable_if_t<std::is_enum_v<T>, typename AddEndian<T, BETag>::type>;
-
-using float_be = AddEndian<float, BETag>::type;
-using double_be = AddEndian<double, BETag>::type;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -123,7 +123,7 @@ struct System::Impl {
        Service::Init(service_manager, *virtual_filesystem);
        GDBStub::Init();

-        renderer = VideoCore::CreateRenderer(emu_window);
+        renderer = VideoCore::CreateRenderer(emu_window, system);
        if (!renderer->Init()) {
            return ResultStatus::ErrorVideoCore;
        }
@@ -175,6 +175,7 @@ struct System::Impl {
            return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
                                             static_cast<u32>(load_result));
        }
+
        status = ResultStatus::Success;
        return status;
    }
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {

    LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
              bp->second.len, bp->second.addr, static_cast<int>(type));
-    Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
-    Core::System::GetInstance().InvalidateCpuInstructionCaches();
+
+    if (type == BreakpointType::Execute) {
+        Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
+        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    }
    p.erase(addr);
 }

@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
    breakpoint.addr = addr;
    breakpoint.len = len;
    Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
+
    static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
-    Memory::WriteBlock(addr, btrap.data(), btrap.size());
-    Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    if (type == BreakpointType::Execute) {
+        Memory::WriteBlock(addr, btrap.data(), btrap.size());
+        Core::System::GetInstance().InvalidateCpuInstructionCaches();
+    }
    p.insert({addr, breakpoint});

    LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -39,10 +39,10 @@ struct CommandHeader {
    union {
        u32_le raw_low;
        BitField<0, 16, CommandType> type;
-        BitField<16, 4, u32> num_buf_x_descriptors;
-        BitField<20, 4, u32> num_buf_a_descriptors;
-        BitField<24, 4, u32> num_buf_b_descriptors;
-        BitField<28, 4, u32> num_buf_w_descriptors;
+        BitField<16, 4, u32_le> num_buf_x_descriptors;
+        BitField<20, 4, u32_le> num_buf_a_descriptors;
+        BitField<24, 4, u32_le> num_buf_b_descriptors;
+        BitField<28, 4, u32_le> num_buf_w_descriptors;
    };

    enum class BufferDescriptorCFlag : u32 {
@@ -53,28 +53,28 @@ struct CommandHeader {

    union {
        u32_le raw_high;
-        BitField<0, 10, u32> data_size;
+        BitField<0, 10, u32_le> data_size;
        BitField<10, 4, BufferDescriptorCFlag> buf_c_descriptor_flags;
-        BitField<31, 1, u32> enable_handle_descriptor;
+        BitField<31, 1, u32_le> enable_handle_descriptor;
    };
 };
 static_assert(sizeof(CommandHeader) == 8, "CommandHeader size is incorrect");

 union HandleDescriptorHeader {
    u32_le raw_high;
-    BitField<0, 1, u32> send_current_pid;
-    BitField<1, 4, u32> num_handles_to_copy;
-    BitField<5, 4, u32> num_handles_to_move;
+    BitField<0, 1, u32_le> send_current_pid;
+    BitField<1, 4, u32_le> num_handles_to_copy;
+    BitField<5, 4, u32_le> num_handles_to_move;
 };
 static_assert(sizeof(HandleDescriptorHeader) == 4, "HandleDescriptorHeader size is incorrect");

 struct BufferDescriptorX {
    union {
-        BitField<0, 6, u32> counter_bits_0_5;
-        BitField<6, 3, u32> address_bits_36_38;
-        BitField<9, 3, u32> counter_bits_9_11;
-        BitField<12, 4, u32> address_bits_32_35;
-        BitField<16, 16, u32> size;
+        BitField<0, 6, u32_le> counter_bits_0_5;
+        BitField<6, 3, u32_le> address_bits_36_38;
+        BitField<9, 3, u32_le> counter_bits_9_11;
+        BitField<12, 4, u32_le> address_bits_32_35;
+        BitField<16, 16, u32_le> size;
    };

    u32_le address_bits_0_31;
@@ -103,10 +103,10 @@ struct BufferDescriptorABW {
    u32_le address_bits_0_31;

    union {
-        BitField<0, 2, u32> flags;
-        BitField<2, 3, u32> address_bits_36_38;
-        BitField<24, 4, u32> size_bits_32_35;
-        BitField<28, 4, u32> address_bits_32_35;
+        BitField<0, 2, u32_le> flags;
+        BitField<2, 3, u32_le> address_bits_36_38;
+        BitField<24, 4, u32_le> size_bits_32_35;
+        BitField<28, 4, u32_le> address_bits_32_35;
    };

    VAddr Address() const {
@@ -128,8 +128,8 @@ struct BufferDescriptorC {
    u32_le address_bits_0_31;

    union {
-        BitField<0, 16, u32> address_bits_32_47;
-        BitField<16, 16, u32> size;
+        BitField<0, 16, u32_le> address_bits_32_47;
+        BitField<16, 16, u32_le> size;
    };

    VAddr Address() const {
@@ -167,8 +167,8 @@ struct DomainMessageHeader {
        struct {
            union {
                BitField<0, 8, CommandType> command;
-                BitField<8, 8, u32> input_object_count;
-                BitField<16, 16, u32> size;
+                BitField<8, 8, u32_le> input_object_count;
+                BitField<16, 16, u32_le> size;
            };
            u32_le object_id;
            INSERT_PADDING_WORDS(2);
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -322,14 +322,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c

 void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_AM, "(STUBBED) called");
+
    // TODO(Subv): Find out how AM determines the display to use, for now just
    // create the layer in the Default display.
-    u64 display_id = nvflinger->OpenDisplay("Default");
-    u64 layer_id = nvflinger->CreateLayer(display_id);
+    const auto display_id = nvflinger->OpenDisplay("Default");
+    const auto layer_id = nvflinger->CreateLayer(*display_id);

    IPC::ResponseBuilder rb{ctx, 4};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(layer_id);
+    rb.Push(*layer_id);
 }

 void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -41,20 +41,20 @@ private:
    struct PadState {
        union {
            u32_le raw{};
-            BitField<0, 1, u32> a;
-            BitField<1, 1, u32> b;
-            BitField<2, 1, u32> x;
-            BitField<3, 1, u32> y;
-            BitField<4, 1, u32> l;
-            BitField<5, 1, u32> r;
-            BitField<6, 1, u32> zl;
-            BitField<7, 1, u32> zr;
-            BitField<8, 1, u32> plus;
-            BitField<9, 1, u32> minus;
-            BitField<10, 1, u32> d_left;
-            BitField<11, 1, u32> d_up;
-            BitField<12, 1, u32> d_right;
-            BitField<13, 1, u32> d_down;
+            BitField<0, 1, u32_le> a;
+            BitField<1, 1, u32_le> b;
+            BitField<2, 1, u32_le> x;
+            BitField<3, 1, u32_le> y;
+            BitField<4, 1, u32_le> l;
+            BitField<5, 1, u32_le> r;
+            BitField<6, 1, u32_le> zl;
+            BitField<7, 1, u32_le> zr;
+            BitField<8, 1, u32_le> plus;
+            BitField<9, 1, u32_le> minus;
+            BitField<10, 1, u32_le> d_left;
+            BitField<11, 1, u32_le> d_up;
+            BitField<12, 1, u32_le> d_right;
+            BitField<13, 1, u32_le> d_down;
        };
    };
    static_assert(sizeof(PadState) == 0x4, "PadState is an invalid size");
@@ -62,7 +62,7 @@ private:
    struct Attributes {
        union {
            u32_le raw{};
-            BitField<0, 1, u32> connected;
+            BitField<0, 1, u32_le> connected;
        };
    };
    static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -39,13 +39,13 @@ public:
        union {
            u32_le raw{};

-            BitField<0, 1, u32> pro_controller;
-            BitField<1, 1, u32> handheld;
-            BitField<2, 1, u32> joycon_dual;
-            BitField<3, 1, u32> joycon_left;
-            BitField<4, 1, u32> joycon_right;
+            BitField<0, 1, u32_le> pro_controller;
+            BitField<1, 1, u32_le> handheld;
+            BitField<2, 1, u32_le> joycon_dual;
+            BitField<3, 1, u32_le> joycon_left;
+            BitField<4, 1, u32_le> joycon_right;

-            BitField<6, 1, u32> pokeball; // TODO(ogniK): Confirm when possible
+            BitField<6, 1, u32_le> pokeball; // TODO(ogniK): Confirm when possible
        };
    };
    static_assert(sizeof(NPadType) == 4, "NPadType is an invalid size");
@@ -150,43 +150,43 @@ private:
        union {
            u64_le raw{};
            // Button states
-            BitField<0, 1, u64> a;
-            BitField<1, 1, u64> b;
-            BitField<2, 1, u64> x;
-            BitField<3, 1, u64> y;
-            BitField<4, 1, u64> l_stick;
-            BitField<5, 1, u64> r_stick;
-            BitField<6, 1, u64> l;
-            BitField<7, 1, u64> r;
-            BitField<8, 1, u64> zl;
-            BitField<9, 1, u64> zr;
-            BitField<10, 1, u64> plus;
-            BitField<11, 1, u64> minus;
+            BitField<0, 1, u64_le> a;
+            BitField<1, 1, u64_le> b;
+            BitField<2, 1, u64_le> x;
+            BitField<3, 1, u64_le> y;
+            BitField<4, 1, u64_le> l_stick;
+            BitField<5, 1, u64_le> r_stick;
+            BitField<6, 1, u64_le> l;
+            BitField<7, 1, u64_le> r;
+            BitField<8, 1, u64_le> zl;
+            BitField<9, 1, u64_le> zr;
+            BitField<10, 1, u64_le> plus;
+            BitField<11, 1, u64_le> minus;

            // D-Pad
-            BitField<12, 1, u64> d_left;
-            BitField<13, 1, u64> d_up;
-            BitField<14, 1, u64> d_right;
-            BitField<15, 1, u64> d_down;
+            BitField<12, 1, u64_le> d_left;
+            BitField<13, 1, u64_le> d_up;
+            BitField<14, 1, u64_le> d_right;
+            BitField<15, 1, u64_le> d_down;

            // Left JoyStick
-            BitField<16, 1, u64> l_stick_left;
-            BitField<17, 1, u64> l_stick_up;
-            BitField<18, 1, u64> l_stick_right;
-            BitField<19, 1, u64> l_stick_down;
+            BitField<16, 1, u64_le> l_stick_left;
+            BitField<17, 1, u64_le> l_stick_up;
+            BitField<18, 1, u64_le> l_stick_right;
+            BitField<19, 1, u64_le> l_stick_down;

            // Right JoyStick
-            BitField<20, 1, u64> r_stick_left;
-            BitField<21, 1, u64> r_stick_up;
-            BitField<22, 1, u64> r_stick_right;
-            BitField<23, 1, u64> r_stick_down;
+            BitField<20, 1, u64_le> r_stick_left;
+            BitField<21, 1, u64_le> r_stick_up;
+            BitField<22, 1, u64_le> r_stick_right;
+            BitField<23, 1, u64_le> r_stick_down;

            // Not always active?
-            BitField<24, 1, u64> left_sl;
-            BitField<25, 1, u64> left_sr;
+            BitField<24, 1, u64_le> left_sl;
+            BitField<25, 1, u64_le> left_sr;

-            BitField<26, 1, u64> right_sl;
-            BitField<27, 1, u64> right_sr;
+            BitField<26, 1, u64_le> right_sl;
+            BitField<27, 1, u64_le> right_sr;
        };
    };
    static_assert(sizeof(ControllerPadState) == 8, "ControllerPadState is an invalid size");
@@ -200,12 +200,12 @@ private:
    struct ConnectionState {
        union {
            u32_le raw{};
-            BitField<0, 1, u32> IsConnected;
-            BitField<1, 1, u32> IsWired;
-            BitField<2, 1, u32> IsLeftJoyConnected;
-            BitField<3, 1, u32> IsLeftJoyWired;
-            BitField<4, 1, u32> IsRightJoyConnected;
-            BitField<5, 1, u32> IsRightJoyWired;
+            BitField<0, 1, u32_le> IsConnected;
+            BitField<1, 1, u32_le> IsWired;
+            BitField<2, 1, u32_le> IsLeftJoyConnected;
+            BitField<3, 1, u32_le> IsLeftJoyWired;
+            BitField<4, 1, u32_le> IsRightJoyConnected;
+            BitField<5, 1, u32_le> IsRightJoyWired;
        };
    };
    static_assert(sizeof(ConnectionState) == 4, "ConnectionState is an invalid size");
@@ -240,23 +240,23 @@ private:
    struct NPadProperties {
        union {
            s64_le raw{};
-            BitField<11, 1, s64> is_vertical;
-            BitField<12, 1, s64> is_horizontal;
-            BitField<13, 1, s64> use_plus;
-            BitField<14, 1, s64> use_minus;
+            BitField<11, 1, s64_le> is_vertical;
+            BitField<12, 1, s64_le> is_horizontal;
+            BitField<13, 1, s64_le> use_plus;
+            BitField<14, 1, s64_le> use_minus;
        };
    };

    struct NPadDevice {
        union {
            u32_le raw{};
-            BitField<0, 1, s32> pro_controller;
-            BitField<1, 1, s32> handheld;
-            BitField<2, 1, s32> handheld_left;
-            BitField<3, 1, s32> handheld_right;
-            BitField<4, 1, s32> joycon_left;
-            BitField<5, 1, s32> joycon_right;
-            BitField<6, 1, s32> pokeball;
+            BitField<0, 1, s32_le> pro_controller;
+            BitField<1, 1, s32_le> handheld;
+            BitField<2, 1, s32_le> handheld_left;
+            BitField<3, 1, s32_le> handheld_right;
+            BitField<4, 1, s32_le> joycon_left;
+            BitField<5, 1, s32_le> joycon_right;
+            BitField<6, 1, s32_le> pokeball;
        };
    };

--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -33,8 +33,8 @@ private:
    struct Attributes {
        union {
            u32 raw{};
-            BitField<0, 1, u32> start_touch;
-            BitField<1, 1, u32> end_touch;
+            BitField<0, 1, u32_le> start_touch;
+            BitField<1, 1, u32_le> end_touch;
        };
    };
    static_assert(sizeof(Attributes) == 0x4, "Attributes is an invalid size");
--- a/src/core/hle/service/lm/lm.cpp
+++ b/src/core/hle/service/lm/lm.cpp
@@ -42,7 +42,7 @@ private:
        union {
            BitField<0, 16, Flags> flags;
            BitField<16, 8, Severity> severity;
-            BitField<24, 8, u32> verbosity;
+            BitField<24, 8, u32_le> verbosity;
        };
        u32_le payload_size;

--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -19,11 +19,11 @@ public:
    virtual ~nvdevice() = default;
    union Ioctl {
        u32_le raw;
-        BitField<0, 8, u32> cmd;
-        BitField<8, 8, u32> group;
-        BitField<16, 14, u32> length;
-        BitField<30, 1, u32> is_in;
-        BitField<31, 1, u32> is_out;
+        BitField<0, 8, u32_le> cmd;
+        BitField<8, 8, u32_le> group;
+        BitField<16, 14, u32_le> length;
+        BitField<30, 1, u32_le> is_in;
+        BitField<31, 1, u32_le> is_out;
    };

    /**
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -25,9 +25,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                        const MathUtil::Rectangle<int>& crop_rect) {
    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
-    LOG_WARNING(Service,
-                "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
-                addr, offset, width, height, stride, format);
+    LOG_TRACE(Service,
+              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
+              addr, offset, width, height, stride, format);

    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
    const Tegra::FramebufferConfig framebuffer{
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -46,7 +46,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
    nvdrv = std::move(instance);
 }

-u64 NVFlinger::OpenDisplay(std::string_view name) {
+std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    LOG_DEBUG(Service, "Opening \"{}\" display", name);

    // TODO(Subv): Currently we only support the Default display.
@@ -54,32 +54,48 @@ u64 NVFlinger::OpenDisplay(std::string_view name) {

    const auto itr = std::find_if(displays.begin(), displays.end(),
                                  [&](const Display& display) { return display.name == name; });
-
-    ASSERT(itr != displays.end());
+    if (itr == displays.end()) {
+        return {};
+    }

    return itr->id;
 }

-u64 NVFlinger::CreateLayer(u64 display_id) {
-    auto& display = FindDisplay(display_id);
+std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
+    auto* const display = FindDisplay(display_id);

-    ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment");
+    if (display == nullptr) {
+        return {};
+    }
+
+    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");

    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
-    display.layers.emplace_back(layer_id, buffer_queue);
+    display->layers.emplace_back(layer_id, buffer_queue);
    buffer_queues.emplace_back(std::move(buffer_queue));
    return layer_id;
 }

-u32 NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
-    const auto& layer = FindLayer(display_id, layer_id);
-    return layer.buffer_queue->GetId();
+std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
+    const auto* const layer = FindLayer(display_id, layer_id);
+
+    if (layer == nullptr) {
+        return {};
+    }
+
+    return layer->buffer_queue->GetId();
 }

-Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) {
-    return FindDisplay(display_id).vsync_event.readable;
+Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
+    auto* const display = FindDisplay(display_id);
+
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    return display->vsync_event.readable;
 }

 std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
@@ -90,40 +106,60 @@ std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
    return *itr;
 }

-Display& NVFlinger::FindDisplay(u64 display_id) {
+Display* NVFlinger::FindDisplay(u64 display_id) {
    const auto itr = std::find_if(displays.begin(), displays.end(),
                                  [&](const Display& display) { return display.id == display_id; });

-    ASSERT(itr != displays.end());
-    return *itr;
+    if (itr == displays.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

-const Display& NVFlinger::FindDisplay(u64 display_id) const {
+const Display* NVFlinger::FindDisplay(u64 display_id) const {
    const auto itr = std::find_if(displays.begin(), displays.end(),
                                  [&](const Display& display) { return display.id == display_id; });

-    ASSERT(itr != displays.end());
-    return *itr;
+    if (itr == displays.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

-Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
-    auto& display = FindDisplay(display_id);
+Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
+    auto* const display = FindDisplay(display_id);

-    const auto itr = std::find_if(display.layers.begin(), display.layers.end(),
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
                                  [&](const Layer& layer) { return layer.id == layer_id; });

-    ASSERT(itr != display.layers.end());
-    return *itr;
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

-const Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
-    const auto& display = FindDisplay(display_id);
+const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
+    const auto* const display = FindDisplay(display_id);

-    const auto itr = std::find_if(display.layers.begin(), display.layers.end(),
+    if (display == nullptr) {
+        return nullptr;
+    }
+
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
                                  [&](const Layer& layer) { return layer.id == layer_id; });

-    ASSERT(itr != display.layers.end());
-    return *itr;
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

 void NVFlinger::Compose() {
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -58,16 +59,24 @@ public:
    void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);

    /// Opens the specified display and returns the ID.
-    u64 OpenDisplay(std::string_view name);
+    ///
+    /// If an invalid display name is provided, then an empty optional is returned.
+    std::optional<u64> OpenDisplay(std::string_view name);

    /// Creates a layer on the specified display and returns the layer ID.
-    u64 CreateLayer(u64 display_id);
+    ///
+    /// If an invalid display ID is specified, then an empty optional is returned.
+    std::optional<u64> CreateLayer(u64 display_id);

    /// Finds the buffer queue ID of the specified layer in the specified display.
-    u32 FindBufferQueueId(u64 display_id, u64 layer_id) const;
+    ///
+    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
+    std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;

    /// Gets the vsync event for the specified display.
-    Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id);
+    ///
+    /// If an invalid display ID is provided, then nullptr is returned.
+    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
@@ -78,16 +87,16 @@ public:

 private:
    /// Finds the display identified by the specified ID.
-    Display& FindDisplay(u64 display_id);
+    Display* FindDisplay(u64 display_id);

    /// Finds the display identified by the specified ID.
-    const Display& FindDisplay(u64 display_id) const;
+    const Display* FindDisplay(u64 display_id) const;

    /// Finds the layer identified by the specified ID in the desired display.
-    Layer& FindLayer(u64 display_id, u64 layer_id);
+    Layer* FindLayer(u64 display_id, u64 layer_id);

    /// Finds the layer identified by the specified ID in the desired display.
-    const Layer& FindLayer(u64 display_id, u64 layer_id) const;
+    const Layer* FindLayer(u64 display_id, u64 layer_id) const;

    std::shared_ptr<Nvidia::Module> nvdrv;

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -34,6 +34,7 @@ namespace Service::VI {

 constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
 constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
+constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};

 struct DisplayInfo {
    /// The name of this particular display.
@@ -838,11 +839,16 @@ private:
                    "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
                    unknown, display, aruid);

-        const u64 layer_id = nv_flinger->CreateLayer(display);
+        const auto layer_id = nv_flinger->CreateLayer(display);
+        if (!layer_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }

        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
-        rb.Push(layer_id);
+        rb.Push(*layer_id);
    }

    void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -950,9 +956,16 @@ private:

        ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");

+        const auto display_id = nv_flinger->OpenDisplay(name);
+        if (!display_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u64>(nv_flinger->OpenDisplay(name));
+        rb.Push<u64>(*display_id);
    }

    void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1043,10 +1056,21 @@ private:

        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);

-        const u64 display_id = nv_flinger->OpenDisplay(display_name);
-        const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id);
+        const auto display_id = nv_flinger->OpenDisplay(display_name);
+        if (!display_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }

-        NativeWindow native_window{buffer_queue_id};
+        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
+        if (!buffer_queue_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
+        NativeWindow native_window{*buffer_queue_id};
        IPC::ResponseBuilder rb{ctx, 4};
        rb.Push(RESULT_SUCCESS);
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1062,13 +1086,24 @@ private:

        // TODO(Subv): What's the difference between a Stray and a Managed layer?

-        const u64 layer_id = nv_flinger->CreateLayer(display_id);
-        const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id);
+        const auto layer_id = nv_flinger->CreateLayer(display_id);
+        if (!layer_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }

-        NativeWindow native_window{buffer_queue_id};
+        const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
+        if (!buffer_queue_id) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }
+
+        NativeWindow native_window{*buffer_queue_id};
        IPC::ResponseBuilder rb{ctx, 6};
        rb.Push(RESULT_SUCCESS);
-        rb.Push(layer_id);
+        rb.Push(*layer_id);
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
    }

@@ -1088,7 +1123,12 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);

-        const auto vsync_event = nv_flinger->GetVsyncEvent(display_id);
+        const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
+        if (!vsync_event) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            rb.Push(ERR_NOT_FOUND);
+            return;
+        }

        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -391,6 +391,7 @@ struct Values {
    float resolution_factor;
    bool use_frame_limit;
    u16 frame_limit;
+    bool use_disk_shader_cache;
    bool use_accurate_gpu_emulation;

    float bg_red;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -158,6 +158,8 @@ TelemetrySession::TelemetrySession() {
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
             Settings::values.use_frame_limit);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
+             Settings::values.use_disk_shader_cache);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
             Settings::values.use_accurate_gpu_emulation);
    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_executable(tests
-    common/bit_field.cpp
    common/param_package.cpp
    common/ring_buffer.cpp
    core/arm/arm_test_common.cpp
--- a/src/tests/common/bit_field.cpp
+++ b/src/tests/common/bit_field.cpp
@@ -1,90 +0,0 @@
-// Copyright 2019 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <cstring>
-#include <type_traits>
-#include <catch2/catch.hpp>
-#include "common/bit_field.h"
-
-TEST_CASE("BitField", "[common]") {
-    enum class TestEnum : u32 {
-        A = 0b10111101,
-        B = 0b10101110,
-        C = 0b00001111,
-    };
-
-    union LEBitField {
-        u32_le raw;
-        BitField<0, 6, u32> a;
-        BitField<6, 4, s32> b;
-        BitField<10, 8, TestEnum> c;
-        BitField<18, 14, u32> d;
-    } le_bitfield;
-
-    union BEBitField {
-        u32_be raw;
-        BitFieldBE<0, 6, u32> a;
-        BitFieldBE<6, 4, s32> b;
-        BitFieldBE<10, 8, TestEnum> c;
-        BitFieldBE<18, 14, u32> d;
-    } be_bitfield;
-
-    static_assert(sizeof(LEBitField) == sizeof(u32));
-    static_assert(sizeof(BEBitField) == sizeof(u32));
-    static_assert(std::is_trivially_copyable_v<LEBitField>);
-    static_assert(std::is_trivially_copyable_v<BEBitField>);
-
-    std::array<u8, 4> raw{{
-        0b01101100,
-        0b11110110,
-        0b10111010,
-        0b11101100,
-    }};
-
-    std::memcpy(&le_bitfield, &raw, sizeof(raw));
-    std::memcpy(&be_bitfield, &raw, sizeof(raw));
-
-    // bit fields: 11101100101110'10111101'1001'101100
-    REQUIRE(le_bitfield.raw == 0b11101100'10111010'11110110'01101100);
-    REQUIRE(le_bitfield.a == 0b101100);
-    REQUIRE(le_bitfield.b == -7); // 1001 as two's complement
-    REQUIRE(le_bitfield.c == TestEnum::A);
-    REQUIRE(le_bitfield.d == 0b11101100101110);
-
-    le_bitfield.a.Assign(0b000111);
-    le_bitfield.b.Assign(-1);
-    le_bitfield.c.Assign(TestEnum::C);
-    le_bitfield.d.Assign(0b01010101010101);
-    std::memcpy(&raw, &le_bitfield, sizeof(raw));
-    // bit fields: 01010101010101'00001111'1111'000111
-    REQUIRE(le_bitfield.raw == 0b01010101'01010100'00111111'11000111);
-    REQUIRE(raw == std::array<u8, 4>{{
-                       0b11000111,
-                       0b00111111,
-                       0b01010100,
-                       0b01010101,
-                   }});
-
-    // bit fields: 01101100111101'10101110'1011'101100
-    REQUIRE(be_bitfield.raw == 0b01101100'11110110'10111010'11101100);
-    REQUIRE(be_bitfield.a == 0b101100);
-    REQUIRE(be_bitfield.b == -5); // 1011 as two's complement
-    REQUIRE(be_bitfield.c == TestEnum::B);
-    REQUIRE(be_bitfield.d == 0b01101100111101);
-
-    be_bitfield.a.Assign(0b000111);
-    be_bitfield.b.Assign(-1);
-    be_bitfield.c.Assign(TestEnum::C);
-    be_bitfield.d.Assign(0b01010101010101);
-    std::memcpy(&raw, &be_bitfield, sizeof(raw));
-    // bit fields: 01010101010101'00001111'1111'000111
-    REQUIRE(be_bitfield.raw == 0b01010101'01010100'00111111'11000111);
-    REQUIRE(raw == std::array<u8, 4>{{
-                       0b01010101,
-                       0b01010100,
-                       0b00111111,
-                       0b11000111,
-                   }});
-}
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -44,6 +44,8 @@ add_library(video_core STATIC
    renderer_opengl/gl_shader_cache.h
    renderer_opengl/gl_shader_decompiler.cpp
    renderer_opengl/gl_shader_decompiler.h
+    renderer_opengl/gl_shader_disk_cache.cpp
+    renderer_opengl/gl_shader_disk_cache.h
    renderer_opengl/gl_shader_gen.cpp
    renderer_opengl/gl_shader_gen.h
    renderer_opengl/gl_shader_manager.cpp
@@ -102,4 +104,4 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)

 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad lz4_static)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -35,8 +35,10 @@ void DmaPusher::DispatchCalls() {
 bool DmaPusher::Step() {
    if (dma_get != dma_put) {
        // Push buffer non-empty, read a word
-        const CommandHeader command_header{
-            Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))};
+        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+        ASSERT_MSG(address, "Invalid GPU address");
+
+        const CommandHeader command_header{Memory::Read32(*address)};

        dma_get += sizeof(u32);

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
    regs.reg_array[method_call.method] = method_call.argument;

    switch (method_call.method) {
-    case FERMI2D_REG_INDEX(trigger): {
+    // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
+    // so trigger on the second 32-bit write.
+    case FERMI2D_REG_INDEX(blit_src_y) + 1: {
        HandleSurfaceCopy();
        break;
    }
@@ -32,55 +34,23 @@ void Fermi2D::HandleSurfaceCopy() {
    LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
                static_cast<u32>(regs.operation));

-    const GPUVAddr source = regs.src.Address();
-    const GPUVAddr dest = regs.dst.Address();
-
-    // TODO(Subv): Only same-format and same-size copies are allowed for now.
-    ASSERT(regs.src.format == regs.dst.format);
-    ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
-
    // TODO(Subv): Only raw copies are implemented.
    ASSERT(regs.operation == Regs::Operation::SrcCopy);

-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+    const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
+    const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
+    const u32 src_blit_x2{
+        static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)};
+    const u32 src_blit_y2{
+        static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};

-    u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
-    u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
+    const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+    const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+                                            regs.blit_dst_x + regs.blit_dst_width,
+                                            regs.blit_dst_y + regs.blit_dst_height};

-    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
-        // All copies here update the main memory, so mark all rasterizer states as invalid.
-        Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-
-        rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
-        // We have to invalidate the destination region to evict any outdated surfaces from the
-        // cache. We do this before actually writing the new data because the destination address
-        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu,
-                                    dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
-
-        if (regs.src.linear == regs.dst.linear) {
-            // If the input layout and the output layout are the same, just perform a raw copy.
-            ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
-            Memory::CopyBlock(dest_cpu, source_cpu,
-                              src_bytes_per_pixel * regs.dst.width * regs.dst.height);
-            return;
-        }
-        u8* src_buffer = Memory::GetPointer(source_cpu);
-        u8* dst_buffer = Memory::GetPointer(dest_cpu);
-        if (!regs.src.linear && regs.dst.linear) {
-            // If the input is tiled and the output is linear, deswizzle the input and copy it over.
-            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
-                                      src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
-                                      dst_buffer, true, regs.src.BlockHeight(),
-                                      regs.src.BlockDepth(), 0);
-        } else {
-            // If the input is linear and the output is tiled, swizzle the input and copy it over.
-            Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
-                                      src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
-                                      src_buffer, false, regs.dst.BlockHeight(),
-                                      regs.dst.BlockDepth(), 0);
-        }
+    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
+        UNIMPLEMENTED();
    }
 }

--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -94,12 +94,22 @@ public:

                Operation operation;

-                INSERT_PADDING_WORDS(0x9);
+                INSERT_PADDING_WORDS(0x177);

-                // TODO(Subv): This is only a guess.
-                u32 trigger;
+                u32 blit_control;

-                INSERT_PADDING_WORDS(0x1A3);
+                INSERT_PADDING_WORDS(0x8);
+
+                u32 blit_dst_x;
+                u32 blit_dst_y;
+                u32 blit_dst_width;
+                u32 blit_dst_height;
+                u64 blit_du_dx;
+                u64 blit_dv_dy;
+                u64 blit_src_x;
+                u64 blit_src_y;
+
+                INSERT_PADDING_WORDS(0x21);
            };
            std::array<u32, NUM_REGS> reg_array;
        };
@@ -122,7 +132,16 @@ private:
 ASSERT_REG_POSITION(dst, 0x80);
 ASSERT_REG_POSITION(src, 0x8C);
 ASSERT_REG_POSITION(operation, 0xAB);
-ASSERT_REG_POSITION(trigger, 0xB5);
+ASSERT_REG_POSITION(blit_control, 0x223);
+ASSERT_REG_POSITION(blit_dst_x, 0x22c);
+ASSERT_REG_POSITION(blit_dst_y, 0x22d);
+ASSERT_REG_POSITION(blit_dst_width, 0x22e);
+ASSERT_REG_POSITION(blit_dst_height, 0x22f);
+ASSERT_REG_POSITION(blit_du_dx, 0x230);
+ASSERT_REG_POSITION(blit_dv_dy, 0x232);
+ASSERT_REG_POSITION(blit_src_x, 0x234);
+ASSERT_REG_POSITION(blit_src_y, 0x236);
+
 #undef ASSERT_REG_POSITION

 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -39,16 +39,17 @@ void KeplerMemory::ProcessData(u32 data) {
    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);

-    GPUVAddr address = regs.dest.Address();
-    VAddr dest_address =
-        *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+    const GPUVAddr address = regs.dest.Address();
+    const auto dest_address =
+        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+    ASSERT_MSG(dest_address, "Invalid GPU address");

    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
    // We do this before actually writing the new data because the destination address might contain
    // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(dest_address, sizeof(u32));
+    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));

-    Memory::Write32(dest_address, data);
+    Memory::Write32(*dest_address, data);
    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -273,7 +273,8 @@ void Maxwell3D::ProcessQueryGet() {
    GPUVAddr sequence_address = regs.query.QueryAddress();
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
+    ASSERT_MSG(address, "Invalid GPU address");

    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -386,14 +387,14 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {

 void Maxwell3D::ProcessCBData(u32 value) {
    // Write the input value to the current const buffer at the current position.
-    GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
+    const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
    ASSERT(buffer_address != 0);

    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);

-    std::optional<VAddr> address =
-        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
+    ASSERT_MSG(address, "Invalid GPU address");

    Memory::Write32(*address, value);
    dirty_flags.OnMemoryWrite();
@@ -403,10 +404,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }

 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    GPUVAddr tic_base_address = regs.tic.TICAddress();
+    const GPUVAddr tic_base_address = regs.tic.TICAddress();

-    GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
+    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");

    Texture::TICEntry tic_entry;
    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -415,10 +417,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
               "TIC versions other than BlockLinear or Pitch are unimplemented");

-    auto r_type = tic_entry.r_type.Value();
-    auto g_type = tic_entry.g_type.Value();
-    auto b_type = tic_entry.b_type.Value();
-    auto a_type = tic_entry.a_type.Value();
+    const auto r_type = tic_entry.r_type.Value();
+    const auto g_type = tic_entry.g_type.Value();
+    const auto b_type = tic_entry.b_type.Value();
+    const auto a_type = tic_entry.a_type.Value();

    // TODO(Subv): Different data types for separate components are not supported
    ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -427,10 +429,11 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }

 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
+    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();

-    GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
+    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");

    Texture::TSCEntry tsc_entry;
    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -452,8 +455,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {

-        Texture::TextureHandle tex_handle{
-            Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
+        const auto address = memory_manager.GpuToCpuAddress(current_texture);
+        ASSERT_MSG(address, "Invalid GPU address");
+
+        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};

        Texture::FullTextureInfo tex_info{};
        // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -462,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
            sizeof(Texture::TextureHandle);

        // Load the TIC data.
-        if (tex_handle.tic_id != 0) {
-            tex_info.enabled = true;
-
-            auto tic_entry = GetTICEntry(tex_handle.tic_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-        }
+        auto tic_entry = GetTICEntry(tex_handle.tic_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));

        // Load the TSC data
-        if (tex_handle.tsc_id != 0) {
-            auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-            // TODO(Subv): Workaround for BitField's move constructor being deleted.
-            std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-        }
+        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));

-        if (tex_info.enabled)
-            textures.push_back(tex_info);
+        textures.push_back(tex_info);
    }

    return textures;
@@ -490,31 +488,28 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
    auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);

-    GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+    const GPUVAddr tex_info_address =
+        tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
+
+    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};

    Texture::FullTextureInfo tex_info{};
    tex_info.index = static_cast<u32>(offset);

    // Load the TIC data.
-    if (tex_handle.tic_id != 0) {
-        tex_info.enabled = true;
-
-        auto tic_entry = GetTICEntry(tex_handle.tic_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-    }
+    auto tic_entry = GetTICEntry(tex_handle.tic_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));

    // Load the TSC data
-    if (tex_handle.tsc_id != 0) {
-        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-    }
+    auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+    // TODO(Subv): Workaround for BitField's move constructor being deleted.
+    std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));

    return tex_info;
 }
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -39,8 +39,10 @@ void MaxwellDMA::HandleCopy() {
    const GPUVAddr source = regs.src_address.Address();
    const GPUVAddr dest = regs.dst_address.Address();

-    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
-    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
+    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
+    ASSERT_MSG(source_cpu, "Invalid source GPU address");
+    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");

    // TODO(Subv): Perform more research and implement all features of this engine.
    ASSERT(regs.exec.enable_swizzle == 0);
@@ -64,7 +66,7 @@ void MaxwellDMA::HandleCopy() {
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
        // y_count).
        if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count);
+            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
            return;
        }

@@ -73,8 +75,8 @@ void MaxwellDMA::HandleCopy() {
        // rectangle. There is no need to manually flush/invalidate the regions because
        // CopyBlock does that for us.
        for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = dest_cpu + line * regs.dst_pitch;
+            const VAddr source_line = *source_cpu + line * regs.src_pitch;
+            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
            Memory::CopyBlock(dest_line, source_line, regs.x_count);
        }
        return;
@@ -87,12 +89,12 @@ void MaxwellDMA::HandleCopy() {
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        rasterizer.FlushRegion(source_cpu, src_size);
+        rasterizer.FlushRegion(*source_cpu, src_size);

        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(*dest_cpu, dst_size);
    };

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,8 +107,8 @@ void MaxwellDMA::HandleCopy() {
                           copy_size * src_bytes_per_pixel);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu,
-                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
+                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                  regs.src_params.pos_y);
    } else {
        ASSERT(regs.dst_params.size_z == 1);
@@ -119,7 +121,7 @@ void MaxwellDMA::HandleCopy() {

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
    }
 }

--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -186,7 +186,7 @@ enum class SubOp : u64 {
 };

 enum class F2iRoundingOp : u64 {
-    None = 0,
+    RoundEven = 0,
    Floor = 1,
    Ceil = 2,
    Trunc = 3,
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -154,7 +154,8 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
    const VAddr base_addr{PageSlot(gpu_addr)};

    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
-        base_addr == static_cast<u64>(PageStatus::Unmapped)) {
+        base_addr == static_cast<u64>(PageStatus::Unmapped) ||
+        base_addr == static_cast<u64>(PageStatus::Reserved)) {
        return {};
    }

--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <atomic>
 #include <functional>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
@@ -45,7 +46,9 @@ public:

    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
+                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                       const MathUtil::Rectangle<u32>& src_rect,
+                                       const MathUtil::Rectangle<u32>& dst_rect) {
        return false;
    }

@@ -61,5 +64,9 @@ public:

    /// Increase/decrease the number of object in pages touching the specified region
    virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
+
+    /// Initialize disk cached resources for the game being emulated
+    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
+                                   const DiskResourceLoadCallback& callback = {}) {}
 };
 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -19,7 +19,8 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                      std::size_t alignment, bool cache) {
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");

    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,7 +46,9 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);

    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    ASSERT_MSG(cpu_addr, "Invalid GPU address");
+
    const u8* source{Memory::GetPointer(*cpu_addr)};

    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -22,6 +22,7 @@
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
@@ -99,8 +100,9 @@ struct FramebufferCacheKey {
    }
 };

-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
+                                   ScreenInfo& info)
+    : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
@@ -447,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
    return boost::make_iterator_range(map.equal_range(interval));
 }

-void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
    const u64 page_start{addr >> Memory::PAGE_BITS};
    const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};

@@ -477,6 +479,11 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
        cached_pages.add({pages_interval, delta});
 }

+void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+                                         const VideoCore::DiskResourceLoadCallback& callback) {
+    shader_cache.LoadDiskCache(stop_loading, callback);
+}
+
 std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
    OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
    std::optional<std::size_t> single_color_target) {
@@ -771,15 +778,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 }

 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst) {
+                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                             const MathUtil::Rectangle<u32>& src_rect,
+                                             const MathUtil::Rectangle<u32>& dst_rect) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
-
-    if (Settings::values.use_accurate_gpu_emulation) {
-        // Skip the accelerated copy and perform a slow but more accurate copy
-        return false;
-    }
-
-    res_cache.FermiCopySurface(src, dst);
+    res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
    return true;
 }

@@ -1004,29 +1007,20 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s

    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
        const auto& entry = entries[bindpoint];
+        const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
        const u32 current_bindpoint = base_bindings.sampler + bindpoint;
-        auto& unit = state.texture_units[current_bindpoint];
-
-        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
-        if (!texture.enabled) {
-            unit.texture = 0;
-            continue;
-        }

        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);

        Surface surface = res_cache.GetTextureSurface(texture, entry);
        if (surface != nullptr) {
-            unit.texture =
+            state.texture_units[current_bindpoint].texture =
                entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
-            unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
-            unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
-            unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
-            unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
-            unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
+            surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+                                   texture.tic.w_source);
        } else {
            // Can occur when texture addr is null or its memory is unmapped/invalid
-            unit.texture = 0;
+            state.texture_units[current_bindpoint].texture = 0;
        }
    }
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <atomic>
 #include <cstddef>
 #include <map>
 #include <memory>
@@ -33,6 +34,10 @@
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"

+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -45,7 +50,8 @@ struct FramebufferCacheKey;

 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info);
+    explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
+                              ScreenInfo& info);
    ~RasterizerOpenGL() override;

    void DrawArrays() override;
@@ -55,11 +61,15 @@ public:
    void InvalidateRegion(VAddr addr, u64 size) override;
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
-                               const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
+                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                               const MathUtil::Rectangle<u32>& src_rect,
+                               const MathUtil::Rectangle<u32>& dst_rect) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
+    void LoadDiskResources(const std::atomic_bool& stop_loading,
+                           const VideoCore::DiskResourceLoadCallback& callback) override;

    /// Maximum supported size that a constbuffer can have in bytes.
    static constexpr std::size_t MaxConstbufferSize = 0x10000;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -18,7 +18,6 @@
 #include "video_core/morton.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/surface.h"
 #include "video_core/textures/astc.h"
@@ -44,14 +43,14 @@ struct FormatTuple {
    bool compressed;
 };

-static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
-    glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
+static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
+    glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
    if (max_mip_level == 1) {
-        glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
+        glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
    }
 }

@@ -126,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,

    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    if (!params.is_tiled) {
+        params.pitch = config.tic.Pitch();
+    }
    params.unaligned_height = config.tic.Height();
    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
    params.identity = SurfaceClass::Uploaded;
@@ -192,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
-    params.width = config.width;
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        params.pitch = config.width;
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.width = params.pitch / bpp;
+    }
    params.height = config.height;
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
@@ -429,7 +437,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
    }
 }

-static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
+void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
+                                            const Surface& dst_surface) {
    const auto& src_params{src_surface->GetSurfaceParams()};
    const auto& dst_params{dst_surface->GetSurfaceParams()};

@@ -439,12 +448,15 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
    glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
                       0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
                       0, 0, width, height, 1);
+
+    dst_surface->MarkAsModified(true, *this);
 }

 MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
-static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
-                        const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
-                        const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) {
+void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
+                                        const GLuint copy_pbo_handle, const GLenum src_attachment,
+                                        const GLenum dst_attachment,
+                                        const std::size_t cubemap_face) {
    MICROPROFILE_SCOPE(OpenGL_CopySurface);
    ASSERT_MSG(dst_attachment == 0, "Unimplemented");

@@ -524,60 +536,48 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
        }
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
    }
+
+    dst_surface->MarkAsModified(true, *this);
 }

 CachedSurface::CachedSurface(const SurfaceParams& params)
    : params(params), gl_target(SurfaceTargetToGL(params.target)),
      cached_size_in_bytes(params.size_in_bytes) {
-    texture.Create();
-    const auto& rect{params.GetRect()};
+    texture.Create(gl_target);

-    // Keep track of previous texture bindings
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-
-    cur_state.texture_units[0].texture = texture.handle;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();
-    glActiveTexture(GL_TEXTURE0);
+    // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
+    // alternatives. This signals a bug on those functions.
+    const auto width = static_cast<GLsizei>(params.MipWidth(0));
+    const auto height = static_cast<GLsizei>(params.MipHeight(0));

    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
    gl_internal_format = format_tuple.internal_format;
-    gl_is_compressed = format_tuple.compressed;

-    if (!format_tuple.compressed) {
-        // Only pre-create the texture for non-compressed textures.
-        switch (params.target) {
-        case SurfaceTarget::Texture1D:
-            glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth());
-            break;
-        case SurfaceTarget::Texture2D:
-        case SurfaceTarget::TextureCubemap:
-            glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
-            break;
-        case SurfaceTarget::Texture3D:
-        case SurfaceTarget::Texture2DArray:
-        case SurfaceTarget::TextureCubeArray:
-            glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
-                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
-                           params.depth);
-            break;
-        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
-                         static_cast<u32>(params.target));
-            UNREACHABLE();
-            glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight());
-        }
+    switch (params.target) {
+    case SurfaceTarget::Texture1D:
+        glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width);
+        break;
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::TextureCubemap:
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
+        break;
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
+        glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height, params.depth);
+        break;
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                     static_cast<u32>(params.target));
+        UNREACHABLE();
+        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
+                           width, height);
    }

-    ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
+    ApplyTextureDefaults(texture.handle, params.max_mip_level);

    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());

@@ -703,9 +703,20 @@ void CachedSurface::LoadGLBuffer() {
        for (u32 i = 0; i < params.max_mip_level; i++)
            SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
    } else {
-        const auto texture_src_data{Memory::GetPointer(params.addr)};
-        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
-        gl_buffer[0].assign(texture_src_data, texture_src_data_end);
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
+                        params.size_in_bytes_gl);
+        } else {
+            const u8* start = Memory::GetPointer(params.addr);
+            u8* write_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(write_to, start, copy_size);
+                start += params.pitch;
+                write_to += copy_size;
+            }
+        }
    }
    for (u32 i = 0; i < params.max_mip_level; i++) {
        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
@@ -742,7 +753,19 @@ void CachedSurface::FlushGLBuffer() {

        SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
    } else {
-        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+        } else {
+            u8* start = Memory::GetPointer(params.addr);
+            const u8* read_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(start, read_to, copy_size);
+                start += params.pitch;
+                read_to += copy_size;
+            }
+        }
    }
 }

@@ -751,63 +774,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
    const auto& rect{params.GetRect(mip_map)};

    // Load data from memory to the surface
-    const GLint x0 = static_cast<GLint>(rect.left);
-    const GLint y0 = static_cast<GLint>(rect.bottom);
-    std::size_t buffer_offset =
+    const auto x0 = static_cast<GLint>(rect.left);
+    const auto y0 = static_cast<GLint>(rect.bottom);
+    auto buffer_offset =
        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
                                 static_cast<std::size_t>(x0)) *
        GetBytesPerPixel(params.pixel_format);

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    const GLuint target_tex = texture.handle;
-    OpenGLState cur_state = OpenGLState::GetCurState();
-
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = target_tex;
-    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
-    cur_state.Apply();

    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));

-    GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
-    glActiveTexture(GL_TEXTURE0);
+    const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
    if (tuple.compressed) {
        switch (params.target) {
        case SurfaceTarget::Texture2D:
-            glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::Texture3D:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)),
+                static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
+                &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::Texture2DArray:
        case SurfaceTarget::TextureCubeArray:
-            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
-                                   static_cast<GLsizei>(params.depth), 0, image_size,
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage3D(
+                texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
+                tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::TextureCubemap: {
-            GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
+            const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
            for (std::size_t face = 0; face < params.depth; ++face) {
-                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
-                                       mip_map, tuple.internal_format,
-                                       static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                       static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                       layer_size, &gl_buffer[mip_map][buffer_offset]);
+                glCompressedTextureSubImage3D(
+                    texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
+                    static_cast<GLsizei>(params.MipWidth(mip_map)),
+                    static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
+                    layer_size, &gl_buffer[mip_map][buffer_offset]);
                buffer_offset += layer_size;
            }
            break;
@@ -816,46 +826,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
-                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
-                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
-                                   static_cast<GLsizei>(params.size_in_bytes_gl),
-                                   &gl_buffer[mip_map][buffer_offset]);
+            glCompressedTextureSubImage2D(
+                texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
+                static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
+                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
        }
    } else {
-
        switch (params.target) {
        case SurfaceTarget::Texture1D:
-            glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
-                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::Texture2D:
-            glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::Texture3D:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
-                            tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::Texture2DArray:
        case SurfaceTarget::TextureCubeArray:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
-                            static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
-                            tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+                                tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
        case SurfaceTarget::TextureCubemap: {
            std::size_t start = buffer_offset;
            for (std::size_t face = 0; face < params.depth; ++face) {
-                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
-                                x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                                &gl_buffer[mip_map][buffer_offset]);
+                glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
+                                    static_cast<GLsizei>(rect.GetWidth()),
+                                    static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
+                                    tuple.type, &gl_buffer[mip_map][buffer_offset]);
                buffer_offset += params.LayerSizeGL(mip_map);
            }
            break;
@@ -864,9 +871,10 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[mip_map][buffer_offset]);
+            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
+                                static_cast<GLsizei>(rect.GetWidth()),
+                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                                &gl_buffer[mip_map][buffer_offset]);
        }
    }

@@ -876,29 +884,18 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
 void CachedSurface::EnsureTextureView() {
    if (texture_view.handle != 0)
        return;
-    // Compressed texture are not being created with immutable storage
-    UNIMPLEMENTED_IF(gl_is_compressed);

    const GLenum target{TargetLayer()};
    const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
    constexpr GLuint min_layer = 0;
    constexpr GLuint min_level = 0;

-    texture_view.Create();
+    glGenTextures(1, &texture_view.handle);
    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
                  params.max_mip_level, min_layer, num_layers);
-
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto& old_tex = cur_state.texture_units[0];
-    SCOPE_EXIT({
-        cur_state.texture_units[0] = old_tex;
-        cur_state.Apply();
-    });
-    cur_state.texture_units[0].texture = texture_view.handle;
-    cur_state.texture_units[0].target = target;
-    cur_state.Apply();
-
-    ApplyTextureDefaults(target, params.max_mip_level);
+    ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
+    glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
+                         reinterpret_cast<const GLint*>(swizzle.data()));
 }

 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
@@ -909,6 +906,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
 }

+void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                                  Tegra::Texture::SwizzleSource swizzle_y,
+                                  Tegra::Texture::SwizzleSource swizzle_z,
+                                  Tegra::Texture::SwizzleSource swizzle_w) {
+    const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
+    const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
+    const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
+    const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
+    if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
+        return;
+    }
+    swizzle = {new_x, new_y, new_z, new_w};
+    const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
+    glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    if (texture_view.handle != 0) {
+        glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
+    }
+}
+
 RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
    : RasterizerCache{rasterizer} {
    read_framebuffer.Create();
@@ -1041,26 +1057,161 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
        }
        address += layer_size;
    }
+
+    dst_surface->MarkAsModified(true, *this);
+}
+
+static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
+                        const MathUtil::Rectangle<u32>& src_rect,
+                        const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+                        GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
+                        std::size_t cubemap_face = 0) {
+
+    const auto& src_params{src_surface->GetSurfaceParams()};
+    const auto& dst_params{dst_surface->GetSurfaceParams()};
+
+    OpenGLState prev_state{OpenGLState::GetCurState()};
+    SCOPE_EXIT({ prev_state.Apply(); });
+
+    OpenGLState state;
+    state.draw.read_framebuffer = read_fb_handle;
+    state.draw.draw_framebuffer = draw_fb_handle;
+    state.Apply();
+
+    u32 buffers{};
+
+    if (src_params.type == SurfaceType::ColorTexture) {
+        switch (src_params.target) {
+        case SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                      src_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
+        case SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   SurfaceTargetToGL(src_params.target),
+                                   src_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
+
+        switch (dst_params.target) {
+        case SurfaceTarget::Texture2D:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        case SurfaceTarget::TextureCubemap:
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
+                dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
+            break;
+        case SurfaceTarget::Texture2DArray:
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                      dst_surface->Texture().handle, 0, 0);
+            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
+            break;
+
+        case SurfaceTarget::Texture3D:
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   SurfaceTargetToGL(dst_params.target),
+                                   dst_surface->Texture().handle, 0, 0);
+            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                                   SurfaceTargetToGL(dst_params.target), 0, 0, 0);
+            break;
+        default:
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   0, 0);
+            break;
+        }
+
+        buffers = GL_COLOR_BUFFER_BIT;
+    } else if (src_params.type == SurfaceType::Depth) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               src_surface->Texture().handle, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                               dst_surface->Texture().handle, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+
+        buffers = GL_DEPTH_BUFFER_BIT;
+    } else if (src_params.type == SurfaceType::DepthStencil) {
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                               src_surface->Texture().handle, 0);
+
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
+                               GL_TEXTURE_2D, 0, 0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                               dst_surface->Texture().handle, 0);
+
+        buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+    }
+
+    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
+                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+                      buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+
+    return true;
 }

 void RasterizerCacheOpenGL::FermiCopySurface(
    const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
-    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) {
+    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
+    const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {

    const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
    const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);

-    ASSERT(src_params.width == dst_params.width);
-    ASSERT(src_params.height == dst_params.height);
    ASSERT(src_params.pixel_format == dst_params.pixel_format);
    ASSERT(src_params.block_height == dst_params.block_height);
    ASSERT(src_params.is_tiled == dst_params.is_tiled);
    ASSERT(src_params.depth == dst_params.depth);
-    ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces
    ASSERT(src_params.target == dst_params.target);
    ASSERT(src_params.rt.index == dst_params.rt.index);

-    FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false));
+    auto src_surface = GetSurface(src_params, true);
+    auto dst_surface = GetSurface(dst_params, true);
+
+    BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
+                draw_framebuffer.handle);
+
+    dst_surface->MarkAsModified(true, *this);
 }

 void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,6 +8,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <vector>

 #include "common/alignment.h"
@@ -272,6 +273,7 @@ struct SurfaceParams {
    u32 height;
    u32 depth;
    u32 unaligned_height;
+    u32 pitch;
    SurfaceTarget target;
    SurfaceClass identity;
    u32 max_mip_level;
@@ -382,6 +384,11 @@ public:
    // Upload data in gl_buffer to this surface's texture
    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);

+    void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
+                       Tegra::Texture::SwizzleSource swizzle_y,
+                       Tegra::Texture::SwizzleSource swizzle_z,
+                       Tegra::Texture::SwizzleSource swizzle_w);
+
 private:
    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);

@@ -393,8 +400,8 @@ private:
    SurfaceParams params{};
    GLenum gl_target{};
    GLenum gl_internal_format{};
-    bool gl_is_compressed{};
    std::size_t cached_size_in_bytes{};
+    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
 };

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -416,7 +423,9 @@ public:

    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
-                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config);
+                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
+                          const MathUtil::Rectangle<u32>& src_rect,
+                          const MathUtil::Rectangle<u32>& dst_rect);

 private:
    void LoadSurface(const Surface& surface);
@@ -437,6 +446,10 @@ private:
    /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
    void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
+    void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
+    void CopySurface(const Surface& src_surface, const Surface& dst_surface,
+                     const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
+                     const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);

    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
    /// previously been used. This is to prevent surfaces from being constantly created and
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R

 namespace OpenGL {

-void OGLTexture::Create() {
+void OGLTexture::Create(GLenum target) {
    if (handle != 0)
        return;

    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    glGenTextures(1, &handle);
+    glCreateTextures(target, 1, &handle);
 }

 void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
 }

 void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
-                                  const char* frag_shader, bool separable_program) {
+                                  const char* frag_shader, bool separable_program,
+                                  bool hint_retrievable) {
    OGLShader vert, geo, frag;
    if (vert_shader)
        vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
        frag.Create(frag_shader, GL_FRAGMENT_SHADER);

    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
-    Create(separable_program, vert.handle, geo.handle, frag.handle);
+    Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
 }

 void OGLProgram::Release() {
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create();
+    void Create(GLenum target);

    /// Deletes the internal OpenGL resource
    void Release();
@@ -101,15 +101,15 @@ public:
    }

    template <typename... T>
-    void Create(bool separable_program, T... shaders) {
+    void Create(bool separable_program, bool hint_retrievable, T... shaders) {
        if (handle != 0)
            return;
-        handle = GLShader::LoadProgram(separable_program, shaders...);
+        handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
    }

    /// Creates a new internal OpenGL resource and stores the handle
    void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                          bool separable_program = false);
+                          bool separable_program = false, bool hint_retrievable = false);

    /// Deletes the internal OpenGL resource
    void Release();
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -11,6 +11,7 @@
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/shader/shader_ir.h"
@@ -19,16 +20,29 @@ namespace OpenGL {

 using VideoCommon::Shader::ProgramCode;

+// One UBO is always reserved for emulation values
+constexpr u32 RESERVED_UBOS = 1;
+
+struct UnspecializedShader {
+    std::string code;
+    GLShader::ShaderEntries entries;
+    Maxwell::ShaderProgram program_type;
+};
+
+namespace {
+
 /// Gets the address for the specified shader stage program
-static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
-    return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
-                                               shader_config.offset);
+    const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
+                                                            shader_config.offset);
+    ASSERT_MSG(address, "Invalid GPU address");
+    return *address;
 }

 /// Gets the shader program code from memory for the specified address
-static ProgramCode GetShaderCode(VAddr addr) {
+ProgramCode GetShaderCode(VAddr addr) {
    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
    return program_code;
@@ -49,38 +63,196 @@ constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
    }
 }

-CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
-    : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}

-    GLShader::ProgramResult program_result;
+/// Describes primitive behavior on geometry shaders
+constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
+    switch (primitive_mode) {
+    case GL_POINTS:
+        return {"points", "Points", 1};
+    case GL_LINES:
+    case GL_LINE_STRIP:
+        return {"lines", "Lines", 2};
+    case GL_LINES_ADJACENCY:
+    case GL_LINE_STRIP_ADJACENCY:
+        return {"lines_adjacency", "LinesAdj", 4};
+    case GL_TRIANGLES:
+    case GL_TRIANGLE_STRIP:
+    case GL_TRIANGLE_FAN:
+        return {"triangles", "Triangles", 3};
+    case GL_TRIANGLES_ADJACENCY:
+    case GL_TRIANGLE_STRIP_ADJACENCY:
+        return {"triangles_adjacency", "TrianglesAdj", 6};
+    default:
+        return {"points", "Invalid", 1};
+    }
+}

-    switch (program_type) {
-    case Maxwell::ShaderProgram::VertexA:
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    std::size_t offset = start_offset;
+    std::size_t size = start_offset * sizeof(u64);
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if (instruction == 0 || (instruction >> 52) == 0x50b) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+        }
+        size += sizeof(u64);
+        offset++;
+    }
+    // The last instruction is included in the program size
+    return std::min(size + sizeof(u64), program.size() * sizeof(u64));
+}
+
+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
+                        const ProgramCode& code_b) {
+    u64 unique_identifier =
+        Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
+    if (program_type != Maxwell::ShaderProgram::VertexA) {
+        return unique_identifier;
+    }
+    // VertexA programs include two programs
+
+    std::size_t seed = 0;
+    boost::hash_combine(seed, unique_identifier);
+
+    const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
+                                                CalculateProgramSize(code_b));
+    boost::hash_combine(seed, identifier_b);
+    return static_cast<u64>(seed);
+}
+
+/// Creates an unspecialized program from code streams
+GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
+                                      ProgramCode program_code_b) {
+    GLShader::ShaderSetup setup(program_code);
+    if (program_type == Maxwell::ShaderProgram::VertexA) {
        // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
        // Conventional HW does not support this, so we combine VertexA and VertexB into one
        // stage here.
-        setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
+        setup.SetProgramB(program_code_b);
+    }
+    setup.program.unique_identifier =
+        GetUniqueIdentifier(program_type, program_code, program_code_b);
+
+    switch (program_type) {
+    case Maxwell::ShaderProgram::VertexA:
    case Maxwell::ShaderProgram::VertexB:
-        CalculateProperties();
-        program_result = GLShader::GenerateVertexShader(setup);
-        break;
+        return GLShader::GenerateVertexShader(setup);
    case Maxwell::ShaderProgram::Geometry:
-        CalculateProperties();
-        program_result = GLShader::GenerateGeometryShader(setup);
-        break;
+        return GLShader::GenerateGeometryShader(setup);
    case Maxwell::ShaderProgram::Fragment:
-        CalculateProperties();
-        program_result = GLShader::GenerateFragmentShader(setup);
-        break;
+        return GLShader::GenerateFragmentShader(setup);
    default:
        LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
        UNREACHABLE();
+        return {};
+    }
+}
+
+CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
+                               Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
+                               GLenum primitive_mode, bool hint_retrievable = false) {
+    std::string source = "#version 430 core\n";
+    source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+
+    for (const auto& cbuf : entries.const_buffers) {
+        source +=
+            fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
+    }
+    for (const auto& gmem : entries.global_memory_entries) {
+        source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
+                              gmem.GetCbufOffset(), base_bindings.gmem++);
+    }
+    for (const auto& sampler : entries.samplers) {
+        source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
+                              base_bindings.sampler++);
+    }
+
+    if (program_type == Maxwell::ShaderProgram::Geometry) {
+        const auto [glsl_topology, debug_name, max_vertices] =
+            GetPrimitiveDescription(primitive_mode);
+
+        source += "layout (" + std::string(glsl_topology) + ") in;\n";
+        source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
+    }
+
+    source += code;
+
+    OGLShader shader;
+    shader.Create(source.c_str(), GetShaderType(program_type));
+
+    auto program = std::make_shared<OGLProgram>();
+    program->Create(true, hint_retrievable, shader.handle);
+    return program;
+}
+
+std::set<GLenum> GetSupportedFormats() {
+    std::set<GLenum> supported_formats;
+
+    GLint num_formats{};
+    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+
+    std::vector<GLint> formats(num_formats);
+    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+
+    for (const GLint format : formats)
+        supported_formats.insert(static_cast<GLenum>(format));
+    return supported_formats;
+}
+
+} // namespace
+
+CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                           ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           ProgramCode&& program_code, ProgramCode&& program_code_b)
+    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
+      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+
+    const std::size_t code_size = CalculateProgramSize(program_code);
+    const std::size_t code_size_b =
+        program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
+
+    GLShader::ProgramResult program_result =
+        CreateProgram(program_type, program_code, program_code_b);
+    if (program_result.first.empty()) {
+        // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
        return;
    }

    code = program_result.first;
    entries = program_result.second;
    shader_length = entries.shader_length;
+
+    const ShaderDiskCacheRaw raw(unique_identifier, program_type,
+                                 static_cast<u32>(code_size / sizeof(u64)),
+                                 static_cast<u32>(code_size_b / sizeof(u64)),
+                                 std::move(program_code), std::move(program_code_b));
+    disk_cache.SaveRaw(raw);
+}
+
+CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                           ShaderDiskCacheOpenGL& disk_cache,
+                           const PrecompiledPrograms& precompiled_programs,
+                           GLShader::ProgramResult result)
+    : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
+      disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
+
+    code = std::move(result.first);
+    entries = result.second;
+    shader_length = entries.shader_length;
 }

 std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
@@ -92,136 +264,222 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
        const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
        auto& program = entry->second;
        if (is_cache_miss) {
-            std::string source = AllocateBindings(base_bindings);
-            source += code;
+            program = TryLoadProgram(primitive_mode, base_bindings);
+            if (!program) {
+                program =
+                    SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+                disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+            }

-            OGLShader shader;
-            shader.Create(source.c_str(), GetShaderType(program_type));
-            program.Create(true, shader.handle);
-            LabelGLObject(GL_PROGRAM, program.handle, addr);
+            LabelGLObject(GL_PROGRAM, program->handle, addr);
        }

-        handle = program.handle;
+        handle = program->handle;
    }

-    // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
-    // emulation values
-    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
+    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
    base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
    base_bindings.sampler += static_cast<u32>(entries.samplers.size());

    return {handle, base_bindings};
 }

-std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
-    std::string code = "#version 430 core\n";
-    code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
-
-    for (const auto& cbuf : entries.const_buffers) {
-        code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
-    }
-
-    for (const auto& gmem : entries.global_memory_entries) {
-        code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
-                            gmem.GetCbufOffset(), base_bindings.gmem++);
-    }
-
-    for (const auto& sampler : entries.samplers) {
-        code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
-                            base_bindings.sampler++);
-    }
-
-    return code;
-}
-
 GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
    const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
    auto& programs = entry->second;

    switch (primitive_mode) {
    case GL_POINTS:
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
    case GL_LINES:
    case GL_LINE_STRIP:
-        return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
+        return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
    case GL_LINES_ADJACENCY:
    case GL_LINE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
-                                   "ShaderLinesAdjacency");
+        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
    case GL_TRIANGLES:
    case GL_TRIANGLE_STRIP:
    case GL_TRIANGLE_FAN:
-        return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
-                                   "ShaderTriangles");
+        return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
    case GL_TRIANGLES_ADJACENCY:
    case GL_TRIANGLE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
-                                   "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
+        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
    default:
        UNREACHABLE_MSG("Unknown primitive mode.");
-        return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
    }
 }

-GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
-                                         const std::string& glsl_topology, u32 max_vertices,
-                                         const std::string& debug_name) {
-    if (target_program.handle != 0) {
-        return target_program.handle;
+GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                                         GLenum primitive_mode) {
+    if (target_program) {
+        return target_program->handle;
+    }
+    const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
+    target_program = TryLoadProgram(primitive_mode, base_bindings);
+    if (!target_program) {
+        target_program =
+            SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
+        disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
    }
-    std::string source = AllocateBindings(base_bindings);
-    source += "layout (" + glsl_topology + ") in;\n";
-    source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
-    source += code;

-    OGLShader shader;
-    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
-    target_program.Create(true, shader.handle);
-    LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
-    return target_program.handle;
+    LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
+
+    return target_program->handle;
 };

-static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // sched instructions appear once every 4 instructions.
-    static constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
+CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
+                                           BaseBindings base_bindings) const {
+    const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
+    if (found == precompiled_programs.end()) {
+        return {};
+    }
+    return found->second;
 }

-static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    std::size_t offset = start_offset;
-    std::size_t size = start_offset * sizeof(u64);
-    while (offset < program.size()) {
-        const u64 inst = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if (inst == 0 || (inst >> 52) == 0x50b) {
-                break;
+ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
+                                            BaseBindings base_bindings) const {
+    return {unique_identifier, base_bindings, primitive_mode};
+}
+
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
+    : RasterizerCache{rasterizer}, disk_cache{system} {}
+
+void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback) {
+    const auto transferable = disk_cache.LoadTransferable();
+    if (!transferable) {
+        return;
+    }
+    const auto [raws, usages] = *transferable;
+
+    auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
+
+    const auto supported_formats{GetSupportedFormats()};
+    const auto unspecialized{
+        GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
+    if (stop_loading)
+        return;
+
+    // Build shaders
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        if (stop_loading)
+            return;
+
+        const auto& usage{usages[i]};
+        LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
+                 i + 1, usages.size());
+
+        const auto& unspec{unspecialized.at(usage.unique_identifier)};
+        const auto dump_it = dumps.find(usage);
+
+        CachedProgram shader;
+        if (dump_it != dumps.end()) {
+            // If the shader is dumped, attempt to load it with
+            shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
+            if (!shader) {
+                // Invalidate the precompiled cache if a shader dumped shader was rejected
+                disk_cache.InvalidatePrecompiled();
+                dumps.clear();
            }
        }
-        size += sizeof(inst);
-        offset++;
-    }
-    return size;
-}
+        if (!shader) {
+            shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
+                                      usage.bindings, usage.primitive, true);
+        }
+        precompiled_programs.insert({usage, std::move(shader)});

-void CachedShader::CalculateProperties() {
-    setup.program.real_size = CalculateProgramSize(setup.program.code);
-    setup.program.real_size_b = 0;
-    setup.program.unique_identifier = Common::CityHash64(
-        reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
-    if (program_type == Maxwell::ShaderProgram::VertexA) {
-        std::size_t seed = 0;
-        boost::hash_combine(seed, setup.program.unique_identifier);
-        setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
-        const u64 identifier_b = Common::CityHash64(
-            reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
-        boost::hash_combine(seed, identifier_b);
-        setup.program.unique_identifier = static_cast<u64>(seed);
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
+    }
+
+    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
+    // precompiling them
+
+    for (std::size_t i = 0; i < usages.size(); ++i) {
+        const auto& usage{usages[i]};
+        if (dumps.find(usage) == dumps.end()) {
+            const auto& program = precompiled_programs.at(usage);
+            disk_cache.SaveDump(usage, program->handle);
+        }
    }
 }

-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
+CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
+    const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
+
+    if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
+        return {};
+    }
+
+    CachedProgram shader = std::make_shared<OGLProgram>();
+    shader->handle = glCreateProgram();
+    glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+    glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
+                    static_cast<GLsizei>(dump.binary.size()));
+
+    GLint link_status{};
+    glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
+    if (link_status == GL_FALSE) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
+        return {};
+    }
+
+    return shader;
+}
+
+std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
+    const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+    const std::vector<ShaderDiskCacheRaw>& raws,
+    const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
+    std::unordered_map<u64, UnspecializedShader> unspecialized;
+
+    if (callback)
+        callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
+
+    for (std::size_t i = 0; i < raws.size(); ++i) {
+        if (stop_loading)
+            return {};
+
+        const auto& raw{raws[i]};
+        const u64 unique_identifier = raw.GetUniqueIdentifier();
+        const u64 calculated_hash =
+            GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+        if (unique_identifier != calculated_hash) {
+            LOG_ERROR(
+                Render_OpenGL,
+                "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
+                raw.GetUniqueIdentifier(), calculated_hash);
+            disk_cache.InvalidateTransferable();
+            return {};
+        }
+
+        GLShader::ProgramResult result;
+        if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
+            // If it's stored in the precompiled file, avoid decompiling it here
+            const auto& stored_decompiled{it->second};
+            result = {stored_decompiled.code, stored_decompiled.entries};
+        } else {
+            // Otherwise decompile the shader at boot and save the result to the decompiled file
+            result =
+                CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
+            disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
+        }
+
+        precompiled_shaders.insert({unique_identifier, result});
+
+        unspecialized.insert(
+            {raw.GetUniqueIdentifier(),
+             {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
+
+        if (callback)
+            callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
+    }
+    return unspecialized;
+}

 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
@@ -235,7 +493,23 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {

    if (!shader) {
        // No shader found - create a new one
-        shader = std::make_shared<CachedShader>(program_addr, program);
+        ProgramCode program_code = GetShaderCode(program_addr);
+        ProgramCode program_code_b;
+        if (program == Maxwell::ShaderProgram::VertexA) {
+            program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
+        }
+        const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+
+        const auto found = precompiled_shaders.find(unique_identifier);
+        if (found != precompiled_shaders.end()) {
+            shader =
+                std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
+                                               precompiled_programs, found->second);
+        } else {
+            shader = std::make_shared<CachedShader>(
+                program_addr, unique_identifier, program, disk_cache, precompiled_programs,
+                std::move(program_code), std::move(program_code_b));
+        }
        Register(shader);
    }

--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,40 +5,49 @@
 #pragma once

 #include <array>
-#include <map>
 #include <memory>
+#include <set>
 #include <tuple>
+#include <unordered_map>

 #include <glad/glad.h>

 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"

+namespace Core {
+class System;
+} // namespace Core
+
 namespace OpenGL {

 class CachedShader;
 class RasterizerOpenGL;
+struct UnspecializedShader;

 using Shader = std::shared_ptr<CachedShader>;
+using CachedProgram = std::shared_ptr<OGLProgram>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct BaseBindings {
-    u32 cbuf{};
-    u32 gmem{};
-    u32 sampler{};
-
-    bool operator<(const BaseBindings& rhs) const {
-        return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
-    }
-};
+using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
+using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;

 class CachedShader final : public RasterizerCacheObject {
 public:
-    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
+    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                          ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          ProgramCode&& program_code, ProgramCode&& program_code_b);
+
+    explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                          ShaderDiskCacheOpenGL& disk_cache,
+                          const PrecompiledPrograms& precompiled_programs,
+                          GLShader::ProgramResult result);

    VAddr GetAddr() const override {
        return addr;
@@ -65,49 +74,67 @@ private:
    // declared by the hardware. Workaround this issue by generating a different shader per input
    // topology class.
    struct GeometryPrograms {
-        OGLProgram points;
-        OGLProgram lines;
-        OGLProgram lines_adjacency;
-        OGLProgram triangles;
-        OGLProgram triangles_adjacency;
+        CachedProgram points;
+        CachedProgram lines;
+        CachedProgram lines_adjacency;
+        CachedProgram triangles;
+        CachedProgram triangles_adjacency;
    };

-    std::string AllocateBindings(BaseBindings base_bindings);
-
    GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);

    /// Generates a geometry shader or returns one that already exists.
-    GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
-                               const std::string& glsl_topology, u32 max_vertices,
-                               const std::string& debug_name);
+    GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
+                               GLenum primitive_mode);

-    void CalculateProperties();
+    CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
+
+    ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;

    VAddr addr{};
-    std::size_t shader_length{};
+    u64 unique_identifier{};
    Maxwell::ShaderProgram program_type{};
-    GLShader::ShaderSetup setup;
+    ShaderDiskCacheOpenGL& disk_cache;
+    const PrecompiledPrograms& precompiled_programs;
+
+    std::size_t shader_length{};
    GLShader::ShaderEntries entries;

    std::string code;

-    std::map<BaseBindings, OGLProgram> programs;
-    std::map<BaseBindings, GeometryPrograms> geometry_programs;
+    std::unordered_map<BaseBindings, CachedProgram> programs;
+    std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;

-    std::map<u32, GLuint> cbuf_resource_cache;
-    std::map<u32, GLuint> gmem_resource_cache;
-    std::map<u32, GLint> uniform_cache;
+    std::unordered_map<u32, GLuint> cbuf_resource_cache;
+    std::unordered_map<u32, GLuint> gmem_resource_cache;
+    std::unordered_map<u32, GLint> uniform_cache;
 };

 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
 public:
-    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
+    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
+
+    /// Loads disk cache for the current game
+    void LoadDiskCache(const std::atomic_bool& stop_loading,
+                       const VideoCore::DiskResourceLoadCallback& callback);

    /// Gets the current specified shader stage program
    Shader GetStageProgram(Maxwell::ShaderProgram program);

 private:
+    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
+        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
+        const std::vector<ShaderDiskCacheRaw>& raws,
+        const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
+
+    CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
+                                             const std::set<GLenum>& supported_formats);
+
    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    ShaderDiskCacheOpenGL disk_cache;
+    PrecompiledShaders precompiled_shaders;
+    PrecompiledPrograms precompiled_programs;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -171,7 +171,7 @@ public:
            code.AddLine(fmt::format("case 0x{:x}u: {{", address));
            ++code.scope;

-            VisitBasicBlock(bb);
+            VisitBlock(bb);

            --code.scope;
            code.AddLine('}');
@@ -193,15 +193,14 @@ public:
    ShaderEntries GetShaderEntries() const {
        ShaderEntries entries;
        for (const auto& cbuf : ir.GetConstantBuffers()) {
-            entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
+            entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
                                               cbuf.first);
        }
        for (const auto& sampler : ir.GetSamplers()) {
-            entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
+            entries.samplers.emplace_back(sampler);
        }
        for (const auto& gmem : ir.GetGlobalMemoryBases()) {
-            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
-                                                       GetGlobalMemoryBlock(gmem));
+            entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
        }
        entries.clip_distances = ir.GetClipDistances();
        entries.shader_length = ir.GetLength();
@@ -424,7 +423,7 @@ private:
            code.AddNewLine();
    }

-    void VisitBasicBlock(const BasicBlock& bb) {
+    void VisitBlock(const NodeBlock& bb) {
        for (const Node node : bb) {
            if (const std::string expr = Visit(node); !expr.empty()) {
                code.AddLine(expr);
@@ -576,7 +575,7 @@ private:
            code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
            ++code.scope;

-            VisitBasicBlock(conditional->GetCode());
+            VisitBlock(conditional->GetCode());

            --code.scope;
            code.AddLine('}');
@@ -617,17 +616,8 @@ private:

    std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
        std::string value = VisitOperand(operation, operand_index);
-
        switch (type) {
-        case Type::Bool:
-        case Type::Bool2:
-        case Type::Float:
-            return value;
-        case Type::Int:
-            return "ftoi(" + value + ')';
-        case Type::Uint:
-            return "ftou(" + value + ')';
-        case Type::HalfFloat:
+        case Type::HalfFloat: {
            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
            if (!half_meta) {
                value = "toHalf2(" + value + ')';
@@ -644,6 +634,26 @@ private:
                return "vec2(toHalf2(" + value + ")[1])";
            }
        }
+        default:
+            return CastOperand(value, type);
+        }
+    }
+
+    std::string CastOperand(const std::string& value, Type type) const {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return "ftoi(" + value + ')';
+        case Type::Uint:
+            return "ftou(" + value + ')';
+        case Type::HalfFloat:
+            // Can't be handled as a stand-alone value
+            UNREACHABLE();
+            return value;
+        }
        UNREACHABLE();
        return value;
    }
@@ -651,6 +661,7 @@ private:
    std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
        switch (type) {
        case Type::Bool:
+        case Type::Bool2:
        case Type::Float:
            if (needs_parenthesis) {
                return '(' + value + ')';
@@ -720,45 +731,51 @@ private:
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);

+        const std::size_t count = operation.GetOperandsCount();
+        const bool has_array = meta->sampler.IsArray();
+        const bool has_shadow = meta->sampler.IsShadow();
+
        std::string expr = func;
        expr += '(';
        expr += GetSampler(meta->sampler);
        expr += ", ";

-        expr += coord_constructors[meta->coords_count - 1];
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
-            const bool is_extra = i >= meta->coords_count;
-            const bool is_array = i == meta->array_index;
+        for (std::size_t i = 0; i < count; ++i) {
+            expr += Visit(operation[i]);

-            std::string operand = [&]() {
-                if (is_extra && is_extra_int) {
-                    if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
-                        return std::to_string(static_cast<s32>(immediate->GetValue()));
-                    } else {
-                        return "ftoi(" + Visit(operation[i]) + ')';
-                    }
-                } else {
-                    return Visit(operation[i]);
-                }
-            }();
-            if (is_array) {
-                ASSERT(!is_extra);
-                operand = "float(ftoi(" + operand + "))";
-            }
-
-            expr += operand;
-
-            if (i + 1 == meta->coords_count) {
-                expr += ')';
-            }
-            if (i + 1 < count) {
+            const std::size_t next = i + 1;
+            if (next < count || has_array || has_shadow)
                expr += ", ";
+        }
+        if (has_array) {
+            expr += "float(ftoi(" + Visit(meta->array) + "))";
+        }
+        if (has_shadow) {
+            if (has_array)
+                expr += ", ";
+            expr += Visit(meta->depth_compare);
+        }
+        expr += ')';
+
+        for (const Node extra : meta->extras) {
+            expr += ", ";
+            if (is_extra_int) {
+                if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
+                    // Inline the string as an immediate integer in GLSL (some extra arguments are
+                    // required to be constant)
+                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+                } else {
+                    expr += "ftoi(" + Visit(extra) + ')';
+                }
+            } else {
+                expr += Visit(extra);
            }
        }
+
        expr += ')';
        return expr;
    }
@@ -1135,7 +1152,7 @@ private:
                                  Type::HalfFloat);
    }

-    std::string F4Texture(Operation operation) {
+    std::string Texture(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1146,7 +1163,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }

-    std::string F4TextureLod(Operation operation) {
+    std::string TextureLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1157,7 +1174,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }

-    std::string F4TextureGather(Operation operation) {
+    std::string TextureGather(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1165,7 +1182,7 @@ private:
               GetSwizzle(meta->element);
    }

-    std::string F4TextureQueryDimensions(Operation operation) {
+    std::string TextureQueryDimensions(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1185,7 +1202,7 @@ private:
        return "0";
    }

-    std::string F4TextureQueryLod(Operation operation) {
+    std::string TextureQueryLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1196,29 +1213,33 @@ private:
        return "0";
    }

-    std::string F4TexelFetch(Operation operation) {
+    std::string TexelFetch(Operation operation) {
        constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);
+        UNIMPLEMENTED_IF(meta->sampler.IsArray());
+        const std::size_t count = operation.GetOperandsCount();

        std::string expr = "texelFetch(";
        expr += GetSampler(meta->sampler);
        expr += ", ";

-        expr += constructors[meta->coords_count - 1];
+        expr += constructors.at(operation.GetOperandsCount() - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
            expr += VisitOperand(operation, i, Type::Int);
-
-            if (i + 1 == meta->coords_count) {
+            const std::size_t next = i + 1;
+            if (next == count)
                expr += ')';
-            }
-            if (i + 1 < count) {
+            else if (next < count)
                expr += ", ";
-            }
+        }
+        for (std::size_t i = 0; i < meta->extras.size(); ++i) {
+            expr += ", ";
+            expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
        }
        expr += ')';
+
        return expr + GetSwizzle(meta->element);
    }

@@ -1455,12 +1476,12 @@ private:
        &GLSLDecompiler::Logical2HNotEqual,
        &GLSLDecompiler::Logical2HGreaterEqual,

-        &GLSLDecompiler::F4Texture,
-        &GLSLDecompiler::F4TextureLod,
-        &GLSLDecompiler::F4TextureGather,
-        &GLSLDecompiler::F4TextureQueryDimensions,
-        &GLSLDecompiler::F4TextureQueryLod,
-        &GLSLDecompiler::F4TexelFetch,
+        &GLSLDecompiler::Texture,
+        &GLSLDecompiler::TextureLod,
+        &GLSLDecompiler::TextureGather,
+        &GLSLDecompiler::TextureQueryDimensions,
+        &GLSLDecompiler::TextureQueryLod,
+        &GLSLDecompiler::TexelFetch,

        &GLSLDecompiler::Branch,
        &GLSLDecompiler::PushFlowStack,
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -18,56 +19,29 @@ class ShaderIR;

 namespace OpenGL::GLShader {

+struct ShaderEntries;
+
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ProgramResult = std::pair<std::string, ShaderEntries>;
+using SamplerEntry = VideoCommon::Shader::Sampler;

 class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
 public:
-    explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry,
-                              Maxwell::ShaderStage stage, const std::string& name, u32 index)
-        : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
+    explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
+        : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}

    u32 GetIndex() const {
        return index;
    }

 private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
    u32 index{};
 };

-class SamplerEntry : public VideoCommon::Shader::Sampler {
-public:
-    explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
-                          const std::string& name)
-        : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
-
-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
-private:
-    std::string name;
-    Maxwell::ShaderStage stage{};
-};
-
 class GlobalMemoryEntry {
 public:
-    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
-                               std::string name)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
+    explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}

    u32 GetCbufIndex() const {
        return cbuf_index;
@@ -77,19 +51,9 @@ public:
        return cbuf_offset;
    }

-    const std::string& GetName() const {
-        return name;
-    }
-
-    Maxwell::ShaderStage GetStage() const {
-        return stage;
-    }
-
 private:
    u32 cbuf_index{};
    u32 cbuf_offset{};
-    Maxwell::ShaderStage stage{};
-    std::string name;
 };

 struct ShaderEntries {
@@ -100,8 +64,6 @@ struct ShaderEntries {
    std::size_t shader_length{};
 };

-using ProgramResult = std::pair<std::string, ShaderEntries>;
-
 std::string GetCommonDeclarations();

 ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,656 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstring>
+#include <fmt/format.h>
+#include <lz4.h>
+
+#include "common/assert.h"
+#include "common/common_paths.h"
+#include "common/common_types.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/scm_rev.h"
+
+#include "core/core.h"
+#include "core/hle/kernel/process.h"
+#include "core/settings.h"
+
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+
+namespace OpenGL {
+
+using ShaderCacheVersionHash = std::array<u8, 64>;
+
+enum class TransferableEntryKind : u32 {
+    Raw,
+    Usage,
+};
+
+enum class PrecompiledEntryKind : u32 {
+    Decompiled,
+    Dump,
+};
+
+constexpr u32 NativeVersion = 1;
+
+// Making sure sizes doesn't change by accident
+static_assert(sizeof(BaseBindings) == 12);
+static_assert(sizeof(ShaderDiskCacheUsage) == 24);
+
+namespace {
+
+ShaderCacheVersionHash GetShaderCacheVersionHash() {
+    ShaderCacheVersionHash hash{};
+    const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
+    std::memcpy(hash.data(), Common::g_shader_cache_version, length);
+    return hash;
+}
+
+template <typename T>
+std::vector<u8> CompressData(const T* source, std::size_t source_size) {
+    if (source_size > LZ4_MAX_INPUT_SIZE) {
+        // Source size exceeds LZ4 maximum input size
+        return {};
+    }
+    const auto source_size_int = static_cast<int>(source_size);
+    const int max_compressed_size = LZ4_compressBound(source_size_int);
+    std::vector<u8> compressed(max_compressed_size);
+    const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
+                                                     reinterpret_cast<char*>(compressed.data()),
+                                                     source_size_int, max_compressed_size);
+    if (compressed_size <= 0) {
+        // Compression failed
+        return {};
+    }
+    compressed.resize(compressed_size);
+    return compressed;
+}
+
+std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
+    std::vector<u8> uncompressed(uncompressed_size);
+    const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
+                                               reinterpret_cast<char*>(uncompressed.data()),
+                                               static_cast<int>(compressed.size()),
+                                               static_cast<int>(uncompressed.size()));
+    if (static_cast<int>(uncompressed_size) != size_check) {
+        // Decompression failed
+        return {};
+    }
+    return uncompressed;
+}
+
+} // namespace
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                       u32 program_code_size, u32 program_code_size_b,
+                                       ProgramCode program_code, ProgramCode program_code_b)
+    : unique_identifier{unique_identifier}, program_type{program_type},
+      program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
+      program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
+
+ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
+
+ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
+
+bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
+    if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
+        file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+    if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
+        return false;
+    }
+
+    program_code.resize(program_code_size);
+    program_code_b.resize(program_code_size_b);
+
+    if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
+    if (file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(program_type)) != 1 ||
+        file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
+        return false;
+    }
+
+    if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
+        return false;
+
+    if (HasProgramA() &&
+        file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        return false;
+    }
+    return true;
+}
+
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+ShaderDiskCacheOpenGL::LoadTransferable() {
+    // Skip games without title id
+    const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+    if (!Settings::values.use_disk_shader_cache || !has_title_id)
+        return {};
+    tried_to_load = true;
+
+    FileUtil::IOFile file(GetTransferablePath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    u32 version{};
+    if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to get transferable cache version for title id={} - skipping",
+                  GetTitleID());
+        return {};
+    }
+
+    if (version < NativeVersion) {
+        LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
+        file.Close();
+        InvalidateTransferable();
+        return {};
+    }
+    if (version > NativeVersion) {
+        LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
+                                   "of the emulator - skipping");
+        return {};
+    }
+
+    // Version is valid, load the shaders
+    std::vector<ShaderDiskCacheRaw> raws;
+    std::vector<ShaderDiskCacheUsage> usages;
+    while (file.Tell() < file.GetSize()) {
+        TransferableEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
+            return {};
+        }
+
+        switch (kind) {
+        case TransferableEntryKind::Raw: {
+            ShaderDiskCacheRaw entry;
+            if (!entry.Load(file)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
+                return {};
+            }
+            transferable.insert({entry.GetUniqueIdentifier(), {}});
+            raws.push_back(std::move(entry));
+            break;
+        }
+        case TransferableEntryKind::Usage: {
+            ShaderDiskCacheUsage usage{};
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
+                LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
+                return {};
+            }
+            usages.push_back(std::move(usage));
+            break;
+        }
+        default:
+            LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
+                      static_cast<u32>(kind));
+            return {};
+        }
+    }
+    return {{raws, usages}};
+}
+
+std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+          std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+ShaderDiskCacheOpenGL::LoadPrecompiled() {
+    if (!IsUsable())
+        return {};
+
+    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    if (!file.IsOpen()) {
+        LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
+                 GetTitleID());
+        return {};
+    }
+
+    const auto result = LoadPrecompiledFile(file);
+    if (!result) {
+        LOG_INFO(Render_OpenGL,
+                 "Failed to load precompiled cache for game with title id={} - removing",
+                 GetTitleID());
+        file.Close();
+        InvalidatePrecompiled();
+        return {};
+    }
+    return *result;
+}
+
+std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                        std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+    ShaderCacheVersionHash file_hash{};
+    if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
+        return {};
+    }
+    if (GetShaderCacheVersionHash() != file_hash) {
+        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
+        return {};
+    }
+
+    std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
+    std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
+    while (file.Tell() < file.GetSize()) {
+        PrecompiledEntryKind kind{};
+        if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+
+        switch (kind) {
+        case PrecompiledEntryKind::Decompiled: {
+            u64 unique_identifier{};
+            if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
+                return {};
+
+            const auto entry = LoadDecompiledEntry(file);
+            if (!entry)
+                return {};
+            decompiled.insert({unique_identifier, std::move(*entry)});
+            break;
+        }
+        case PrecompiledEntryKind::Dump: {
+            ShaderDiskCacheUsage usage;
+            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
+                return {};
+
+            ShaderDiskCacheDump dump;
+            if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
+                return {};
+
+            u32 binary_length{};
+            u32 compressed_size{};
+            if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
+                file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
+                return {};
+            }
+
+            std::vector<u8> compressed_binary(compressed_size);
+            if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
+                compressed_binary.size()) {
+                return {};
+            }
+
+            dump.binary = DecompressData(compressed_binary, binary_length);
+            if (dump.binary.empty()) {
+                return {};
+            }
+
+            dumps.insert({usage, dump});
+            break;
+        }
+        default:
+            return {};
+        }
+    }
+    return {{decompiled, dumps}};
+}
+
+std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
+    FileUtil::IOFile& file) {
+    u32 code_size{};
+    u32 compressed_code_size{};
+    if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
+        file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
+        return {};
+    }
+
+    std::vector<u8> compressed_code(compressed_code_size);
+    if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return {};
+    }
+
+    const std::vector<u8> code = DecompressData(compressed_code, code_size);
+    if (code.empty()) {
+        return {};
+    }
+    ShaderDiskCacheDecompiled entry;
+    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+
+    u32 const_buffers_count{};
+    if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < const_buffers_count; ++i) {
+        u32 max_offset{};
+        u32 index{};
+        u8 is_indirect{};
+        if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+    }
+
+    u32 samplers_count{};
+    if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < samplers_count; ++i) {
+        u64 offset{};
+        u64 index{};
+        u32 type{};
+        u8 is_array{};
+        u8 is_shadow{};
+        if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
+            file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
+            file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+            return {};
+        }
+        entry.entries.samplers.emplace_back(
+            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+            static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+    }
+
+    u32 global_memory_count{};
+    if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
+        return {};
+    for (u32 i = 0; i < global_memory_count; ++i) {
+        u32 cbuf_index{};
+        u32 cbuf_offset{};
+        if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
+            file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
+            return {};
+        }
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
+    }
+
+    for (auto& clip_distance : entry.entries.clip_distances) {
+        u8 clip_distance_raw{};
+        if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
+            return {};
+        clip_distance = clip_distance_raw != 0;
+    }
+
+    u64 shader_length{};
+    if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
+        return {};
+    entry.entries.shader_length = static_cast<std::size_t>(shader_length);
+
+    return entry;
+}
+
+bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
+                                               const std::string& code,
+                                               const std::vector<u8>& compressed_code,
+                                               const GLShader::ShaderEntries& entries) {
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
+        file.WriteObject(unique_identifier) != 1 ||
+        file.WriteObject(static_cast<u32>(code.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
+        file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
+        return false;
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
+        return false;
+    for (const auto& cbuf : entries.const_buffers) {
+        if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
+            file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
+        return false;
+    for (const auto& sampler : entries.samplers) {
+        if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
+            file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
+            file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+            return false;
+        }
+    }
+
+    if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
+        return false;
+    for (const auto& gmem : entries.global_memory_entries) {
+        if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
+            file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
+            return false;
+        }
+    }
+
+    for (const bool clip_distance : entries.clip_distances) {
+        if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
+            return false;
+    }
+
+    return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
+}
+
+void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
+    if (!FileUtil::Delete(GetTransferablePath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
+                  GetTransferablePath());
+    }
+    InvalidatePrecompiled();
+}
+
+void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
+    if (!FileUtil::Delete(GetPrecompiledPath())) {
+        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
+    if (!IsUsable())
+        return;
+
+    const u64 id = entry.GetUniqueIdentifier();
+    if (transferable.find(id) != transferable.end()) {
+        // The shader already exists
+        return;
+    }
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+    if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
+        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+    transferable.insert({id, {}});
+}
+
+void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
+    if (!IsUsable())
+        return;
+
+    const auto it = transferable.find(usage.unique_identifier);
+    ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
+
+    auto& usages{it->second};
+    ASSERT(usages.find(usage) == usages.end());
+    usages.insert(usage);
+
+    FileUtil::IOFile file = AppendTransferableFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
+        LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
+        file.Close();
+        InvalidateTransferable();
+        return;
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
+                                           const GLShader::ShaderEntries& entries) {
+    if (!IsUsable())
+        return;
+
+    const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
+    if (compressed_code.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
+                  unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
+        LOG_ERROR(Render_OpenGL,
+                  "Failed to save decompiled entry to the precompiled file - removing");
+        file.Close();
+        InvalidatePrecompiled();
+    }
+}
+
+void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+    if (!IsUsable())
+        return;
+
+    GLint binary_length{};
+    glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
+
+    GLenum binary_format{};
+    std::vector<u8> binary(binary_length);
+    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
+
+    const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
+    if (compressed_binary.empty()) {
+        LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
+                  usage.unique_identifier);
+        return;
+    }
+
+    FileUtil::IOFile file = AppendPrecompiledFile();
+    if (!file.IsOpen())
+        return;
+
+    if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
+        file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
+        file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
+        file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
+        file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
+            compressed_binary.size()) {
+        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
+                  usage.unique_identifier);
+        file.Close();
+        InvalidatePrecompiled();
+        return;
+    }
+}
+
+bool ShaderDiskCacheOpenGL::IsUsable() const {
+    return tried_to_load && Settings::values.use_disk_shader_cache;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto transferable_path{GetTransferablePath()};
+    const bool existed = FileUtil::Exists(transferable_path);
+
+    FileUtil::IOFile file(transferable_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
+        return {};
+    }
+    if (!existed || file.GetSize() == 0) {
+        // If the file didn't exist, write its version
+        if (file.WriteObject(NativeVersion) != 1) {
+            LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
+                      transferable_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
+    if (!EnsureDirectories())
+        return {};
+
+    const auto precompiled_path{GetPrecompiledPath()};
+    const bool existed = FileUtil::Exists(precompiled_path);
+
+    FileUtil::IOFile file(precompiled_path, "ab");
+    if (!file.IsOpen()) {
+        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
+        return {};
+    }
+
+    if (!existed || file.GetSize() == 0) {
+        const auto hash{GetShaderCacheVersionHash()};
+        if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
+            LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
+                      precompiled_path);
+            return {};
+        }
+    }
+    return file;
+}
+
+bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
+    const auto CreateDir = [](const std::string& dir) {
+        if (!FileUtil::CreateDir(dir)) {
+            LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
+            return false;
+        }
+        return true;
+    };
+
+    return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
+           CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
+           CreateDir(GetPrecompiledDir());
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
+    return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
+    return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
+}
+
+std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
+    return GetBaseDir() + DIR_SEP "transferable";
+}
+
+std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
+    return GetBaseDir() + DIR_SEP "precompiled";
+}
+
+std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
+    return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
+}
+
+std::string ShaderDiskCacheOpenGL::GetTitleID() const {
+    return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+}
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include <glad/glad.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_shader_gen.h"
+
+namespace Core {
+class System;
+}
+
+namespace FileUtil {
+class IOFile;
+}
+
+namespace OpenGL {
+
+using ProgramCode = std::vector<u64>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+/// Allocated bindings used by an OpenGL shader program
+struct BaseBindings {
+    u32 cbuf{};
+    u32 gmem{};
+    u32 sampler{};
+
+    bool operator==(const BaseBindings& rhs) const {
+        return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
+    }
+
+    bool operator!=(const BaseBindings& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+/// Describes how a shader is used
+struct ShaderDiskCacheUsage {
+    u64 unique_identifier{};
+    BaseBindings bindings;
+    GLenum primitive{};
+
+    bool operator==(const ShaderDiskCacheUsage& rhs) const {
+        return std::tie(unique_identifier, bindings, primitive) ==
+               std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
+    }
+
+    bool operator!=(const ShaderDiskCacheUsage& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+} // namespace OpenGL
+
+namespace std {
+
+template <>
+struct hash<OpenGL::BaseBindings> {
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+        return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
+    }
+};
+
+template <>
+struct hash<OpenGL::ShaderDiskCacheUsage> {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+        return static_cast<std::size_t>(usage.unique_identifier) ^
+               std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
+    }
+};
+
+} // namespace std
+
+namespace OpenGL {
+
+/// Describes a shader how it's used by the guest GPU
+class ShaderDiskCacheRaw {
+public:
+    explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
+                                u32 program_code_size, u32 program_code_size_b,
+                                ProgramCode program_code, ProgramCode program_code_b);
+    ShaderDiskCacheRaw();
+    ~ShaderDiskCacheRaw();
+
+    bool Load(FileUtil::IOFile& file);
+
+    bool Save(FileUtil::IOFile& file) const;
+
+    u64 GetUniqueIdentifier() const {
+        return unique_identifier;
+    }
+
+    bool HasProgramA() const {
+        return program_type == Maxwell::ShaderProgram::VertexA;
+    }
+
+    Maxwell::ShaderProgram GetProgramType() const {
+        return program_type;
+    }
+
+    Maxwell::ShaderStage GetProgramStage() const {
+        switch (program_type) {
+        case Maxwell::ShaderProgram::VertexA:
+        case Maxwell::ShaderProgram::VertexB:
+            return Maxwell::ShaderStage::Vertex;
+        case Maxwell::ShaderProgram::TesselationControl:
+            return Maxwell::ShaderStage::TesselationControl;
+        case Maxwell::ShaderProgram::TesselationEval:
+            return Maxwell::ShaderStage::TesselationEval;
+        case Maxwell::ShaderProgram::Geometry:
+            return Maxwell::ShaderStage::Geometry;
+        case Maxwell::ShaderProgram::Fragment:
+            return Maxwell::ShaderStage::Fragment;
+        }
+        UNREACHABLE();
+    }
+
+    const ProgramCode& GetProgramCode() const {
+        return program_code;
+    }
+
+    const ProgramCode& GetProgramCodeB() const {
+        return program_code_b;
+    }
+
+private:
+    u64 unique_identifier{};
+    Maxwell::ShaderProgram program_type{};
+    u32 program_code_size{};
+    u32 program_code_size_b{};
+
+    ProgramCode program_code;
+    ProgramCode program_code_b;
+};
+
+/// Contains decompiled data from a shader
+struct ShaderDiskCacheDecompiled {
+    std::string code;
+    GLShader::ShaderEntries entries;
+};
+
+/// Contains an OpenGL dumped binary program
+struct ShaderDiskCacheDump {
+    GLenum binary_format;
+    std::vector<u8> binary;
+};
+
+class ShaderDiskCacheOpenGL {
+public:
+    explicit ShaderDiskCacheOpenGL(Core::System& system);
+
+    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
+    std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
+    LoadTransferable();
+
+    /// Loads current game's precompiled cache. Invalidates on failure.
+    std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+              std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
+    LoadPrecompiled();
+
+    /// Removes the transferable (and precompiled) cache file.
+    void InvalidateTransferable() const;
+
+    /// Removes the precompiled cache file.
+    void InvalidatePrecompiled() const;
+
+    /// Saves a raw dump to the transferable file. Checks for collisions.
+    void SaveRaw(const ShaderDiskCacheRaw& entry);
+
+    /// Saves shader usage to the transferable file. Does not check for collisions.
+    void SaveUsage(const ShaderDiskCacheUsage& usage);
+
+    /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
+    void SaveDecompiled(u64 unique_identifier, const std::string& code,
+                        const GLShader::ShaderEntries& entries);
+
+    /// Saves a dump entry to the precompiled file. Does not check for collisions.
+    void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+
+private:
+    /// Loads the transferable cache. Returns empty on failure.
+    std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
+                            std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+    LoadPrecompiledFile(FileUtil::IOFile& file);
+
+    /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
+    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
+
+    /// Saves a decompiled entry to the passed file. Returns true on success.
+    bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
+                            const std::vector<u8>& compressed_code,
+                            const GLShader::ShaderEntries& entries);
+
+    /// Returns if the cache can be used
+    bool IsUsable() const;
+
+    /// Opens current game's transferable file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendTransferableFile() const;
+
+    /// Opens current game's precompiled file and write it's header if it doesn't exist
+    FileUtil::IOFile AppendPrecompiledFile() const;
+
+    /// Create shader disk cache directories. Returns true on success.
+    bool EnsureDirectories() const;
+
+    /// Gets current game's transferable file path
+    std::string GetTransferablePath() const;
+
+    /// Gets current game's precompiled file path
+    std::string GetPrecompiledPath() const;
+
+    /// Get user's transferable directory path
+    std::string GetTransferableDir() const;
+
+    /// Get user's precompiled directory path
+    std::string GetPrecompiledDir() const;
+
+    /// Get user's shader directory path
+    std::string GetBaseDir() const;
+
+    /// Get current game's title id
+    std::string GetTitleID() const;
+
+    // Copre system
+    Core::System& system;
+    // Stored transferable shaders
+    std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+    // The cache has been loaded at boot
+    bool tried_to_load{};
+};
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -26,12 +26,10 @@ struct ShaderSetup {
        ProgramCode code;
        ProgramCode code_b; // Used for dual vertex shaders
        u64 unique_identifier;
-        std::size_t real_size;
-        std::size_t real_size_b;
    } program;

    /// Used in scenarios where we have a dual vertex shaders
-    void SetProgramB(ProgramCode&& program_b) {
+    void SetProgramB(ProgramCode program_b) {
        program.code_b = std::move(program_b);
        has_program_b = true;
    }
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
 * @returns Handle of the newly created OpenGL program object
 */
 template <typename... T>
-GLuint LoadProgram(bool separable_program, T... shaders) {
+GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
    // Link the program
    LOG_DEBUG(Render_OpenGL, "Linking program...");

@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
    if (separable_program) {
        glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
    }
+    if (hint_retrievable) {
+        glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
+    }

    glLinkProgram(program_id);

--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -462,29 +462,35 @@ void OpenGLState::ApplyPolygonOffset() const {
 }

 void OpenGLState::ApplyTextures() const {
+    bool has_delta{};
+    std::size_t first{};
+    std::size_t last{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+
    for (std::size_t i = 0; i < std::size(texture_units); ++i) {
        const auto& texture_unit = texture_units[i];
        const auto& cur_state_texture_unit = cur_state.texture_units[i];
+        textures[i] = texture_unit.texture;

-        if (texture_unit.texture != cur_state_texture_unit.texture) {
-            glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
-            glBindTexture(texture_unit.target, texture_unit.texture);
-        }
-        // Update the texture swizzle
-        if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r ||
-            texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
-            texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
-            texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
-            std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
-                                         texture_unit.swizzle.b, texture_unit.swizzle.a};
-            glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+        if (textures[i] != cur_state_texture_unit.texture) {
+            if (!has_delta) {
+                first = i;
+                has_delta = true;
+            }
+            last = i;
        }
    }
+
+    if (has_delta) {
+        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       textures.data());
+    }
 }

 void OpenGLState::ApplySamplers() const {
    bool has_delta{};
-    std::size_t first{}, last{};
+    std::size_t first{};
+    std::size_t last{};
    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
    for (std::size_t i = 0; i < std::size(samplers); ++i) {
        samplers[i] = texture_units[i].sampler;
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -126,26 +126,14 @@ public:
    struct TextureUnit {
        GLuint texture; // GL_TEXTURE_BINDING_2D
        GLuint sampler; // GL_SAMPLER_BINDING
-        GLenum target;
-        struct {
-            GLint r; // GL_TEXTURE_SWIZZLE_R
-            GLint g; // GL_TEXTURE_SWIZZLE_G
-            GLint b; // GL_TEXTURE_SWIZZLE_B
-            GLint a; // GL_TEXTURE_SWIZZLE_A
-        } swizzle;

        void Unbind() {
            texture = 0;
-            swizzle.r = GL_RED;
-            swizzle.g = GL_GREEN;
-            swizzle.b = GL_BLUE;
-            swizzle.a = GL_ALPHA;
        }

        void Reset() {
            Unbind();
            sampler = 0;
-            target = GL_TEXTURE_2D;
        }
    };
    std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -98,8 +98,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
    return matrix;
 }

-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
-    : VideoCore::RendererBase{window} {}
+RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
+    : VideoCore::RendererBase{window}, system{system} {}

 RendererOpenGL::~RendererOpenGL() = default;

@@ -171,10 +171,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
                                       Memory::GetPointer(framebuffer_addr),
                                       gl_framebuffer_data.data(), true);

-        state.texture_units[0].texture = screen_info.texture.resource.handle;
-        state.Apply();
-
-        glActiveTexture(GL_TEXTURE0);
        glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));

        // Update existing texture
@@ -182,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
        //       they differ from the LCD resolution.
        // TODO: Applications could theoretically crash yuzu here by specifying too large
        //       framebuffer sizes. We should make sure that this cannot happen.
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
-                        screen_info.texture.gl_format, screen_info.texture.gl_type,
-                        gl_framebuffer_data.data());
+        glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+                            framebuffer.height, screen_info.texture.gl_format,
+                            screen_info.texture.gl_type, gl_framebuffer_data.data());

        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
-        state.texture_units[0].texture = 0;
-        state.Apply();
    }
 }

@@ -199,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
 */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                const TextureInfo& texture) {
-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
-
-    // Update existing texture
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
+    glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 }

 /**
@@ -249,26 +233,13 @@ void RendererOpenGL::InitOpenGLObjects() {
                              sizeof(ScreenRectVertex));

    // Allocate textures for the screen
-    screen_info.texture.resource.Create();
+    screen_info.texture.resource.Create(GL_TEXTURE_2D);

-    // Allocation of storage is deferred until the first frame, when we
-    // know the framebuffer size.
-
-    state.texture_units[0].texture = screen_info.texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    const GLuint texture = screen_info.texture.resource.handle;
+    glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);

    screen_info.display_texture = screen_info.texture.resource.handle;

-    state.texture_units[0].texture = 0;
-    state.Apply();
-
    // Clear screen to black
    LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
 }
@@ -279,25 +250,24 @@ void RendererOpenGL::CreateRasterizer() {
    }
    // Initialize sRGB Usage
    OpenGLState::ClearsRGBUsed();
-    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info);
+    rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
 }

 void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                 const Tegra::FramebufferConfig& framebuffer) {
-
    texture.width = framebuffer.width;
    texture.height = framebuffer.height;

    GLint internal_format;
    switch (framebuffer.pixel_format) {
    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
        texture.gl_format = GL_RGBA;
        texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
        gl_framebuffer_data.resize(texture.width * texture.height * 4);
        break;
    default:
-        internal_format = GL_RGBA;
+        internal_format = GL_RGBA8;
        texture.gl_format = GL_RGBA;
        texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
        gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -306,15 +276,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
        UNREACHABLE();
    }

-    state.texture_units[0].texture = texture.resource.handle;
-    state.Apply();
-
-    glActiveTexture(GL_TEXTURE0);
-    glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
-                 texture.gl_format, texture.gl_type, nullptr);
-
-    state.texture_units[0].texture = 0;
-    state.Apply();
+    texture.resource.Release();
+    texture.resource.Create(GL_TEXTURE_2D);
+    glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
 }

 void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -356,7 +320,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
    }};

    state.texture_units[0].texture = screen_info.display_texture;
-    state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
    // Workaround brigthness problems in SMO by enabling sRGB in the final output
    // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
    state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"

+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -41,7 +45,7 @@ struct ScreenInfo {

 class RendererOpenGL : public VideoCore::RendererBase {
 public:
-    explicit RendererOpenGL(Core::Frontend::EmuWindow& window);
+    explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
    ~RendererOpenGL() override;

    /// Swap buffers (render frame)
@@ -72,6 +76,8 @@ private:
    void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                    const TextureInfo& texture);

+    Core::System& system;
+
    OpenGLState state;

    // OpenGL object IDs
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
    return exit_method = ExitMethod::AlwaysReturn;
 }

-BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
-    BasicBlock basic_block;
+NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
+    NodeBlock basic_block;
    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
        pc = DecodeInstr(basic_block, pc);
    }
    return basic_block;
 }

-u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
    // Ignore sched instructions when generating code.
    if (IsSchedInstruction(pc, main_offset)) {
        return pc + 1;
@@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
                         "NeverExecute predicate not implemented");

-    static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
-        decoders = {
-            {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
-            {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
-            {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
-            {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
-            {OpCode::Type::Shift, &ShaderIR::DecodeShift},
-            {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
-            {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
-            {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
-            {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
-            {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
-            {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
-            {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
-            {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
-            {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
-            {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
-            {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
-            {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
-            {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
-            {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
-            {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
-            {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
-            {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
-            {OpCode::Type::Video, &ShaderIR::DecodeVideo},
-            {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
-        };
+    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
+        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
+        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
+        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
+        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
+        {OpCode::Type::Shift, &ShaderIR::DecodeShift},
+        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
+        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
+        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
+        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
+        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
+        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
+        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
+        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
+        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
+        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
+        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
+        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
+        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
+        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
+        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
+        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
+        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
+        {OpCode::Type::Video, &ShaderIR::DecodeVideo},
+        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
+    };

    std::vector<Node> tmp_block;
    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
-        pc = (this->*decoder->second)(tmp_block, bb, pc);
+        pc = (this->*decoder->second)(tmp_block, pc);
    } else {
-        pc = DecodeOther(tmp_block, bb, pc);
+        pc = DecodeOther(tmp_block, pc);
    }

    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
@@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
    const auto pred_index = static_cast<u32>(instr.pred.pred_index);

    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
-        bb.push_back(
-            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
+        const Node conditional =
+            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
+        global_code.push_back(conditional);
+        bb.push_back(conditional);
    } else {
        for (auto& node : tmp_block) {
-            bb.push_back(std::move(node));
+            global_code.push_back(node);
+            bb.push_back(node);
        }
    }

--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::SubOp;

-u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3

        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);

-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
@@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
    return pc;
 }

-void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
+void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
                                    Node imm_lut, bool sets_cc) {
    constexpr u32 lop_iterations = 32;
    const Node one = Immediate(1);
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
    SetRegister(bb, dest, value);
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
 using Tegra::Shader::PredicateResultMode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&
    return pc;
 }

-void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
-                                   Node op_a, Node op_b, PredicateResultMode predicate_mode,
-                                   Pred predicate, bool sets_cc) {
+void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
+                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
+                                   bool sets_cc) {
    const Node result = [&]() {
        switch (logic_op) {
        case LogicOperation::And:
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {

        value = [&]() {
            switch (instr.conversion.f2i.rounding) {
-            case Tegra::Shader::F2iRoundingOp::None:
-                return value;
+            case Tegra::Shader::F2iRoundingOp::RoundEven:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Floor:
                return Operation(OperationCode::FFloor, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
    }
 }

-u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        }();

        const Node addr_register = GetRegister(instr.gpr8);
-        const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
+        const Node base_address =
+            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
        const auto cbuf = std::get_if<CbufNode>(base_address);
        ASSERT(cbuf != nullptr);
        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
@@ -305,7 +306,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
    case OpCode::Id::TLD4S: {
        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
-
        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
        }
@@ -314,9 +314,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);

-        std::vector<Node> coords;
-
        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
        if (depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
@@ -327,18 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
            coords.push_back(op_a);
            coords.push_back(op_b);
        }
-        const auto num_coords = static_cast<u32>(coords.size());
-        coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+        std::vector<Node> extras;
+        extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));

        const auto& sampler =
            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);

        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
-            auto params = coords;
-            MetaTexture meta{sampler, element, num_coords};
-            values[element] =
-                Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+            auto coords_copy = coords;
+            MetaTexture meta{sampler, {}, {}, extras, element};
+            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

        WriteTexsInstructionFloat(bb, instr, values);
@@ -359,12 +357,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        switch (instr.txq.query_type) {
        case Tegra::Shader::TextureQueryType::Dimension: {
            for (u32 element = 0; element < 4; ++element) {
-                if (instr.txq.IsComponentEnabled(element)) {
-                    MetaTexture meta{sampler, element};
-                    const Node value = Operation(OperationCode::F4TextureQueryDimensions,
-                                                 std::move(meta), GetRegister(instr.gpr8));
-                    SetTemporal(bb, indexer++, value);
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
                }
+                MetaTexture meta{sampler, {}, {}, {}, element};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                SetTemporal(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
@@ -411,9 +410,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {

        for (u32 element = 0; element < 2; ++element) {
            auto params = coords;
-            MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
-            const Node value =
-                Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
+            MetaTexture meta{sampler, {}, {}, {}, element};
+            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporal(bb, element, value);
        }
        for (u32 element = 0; element < 2; ++element) {
@@ -431,7 +429,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");

        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
        }

        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
@@ -464,8 +462,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
    return *used_samplers.emplace(entry).first;
 }

-void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
-                                        const Node4& components) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
    u32 dest_elem = 0;
    for (u32 elem = 0; elem < 4; ++elem) {
        if (!instr.tex.IsComponentEnabled(elem)) {
@@ -480,7 +477,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
    }
 }

-void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
                                         const Node4& components) {
    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -504,7 +501,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
    }
 }

-void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
+void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
                                             const Node4& components) {
    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
    // float instruction).
@@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
 }

 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                               std::size_t array_offset, std::size_t bias_offset,
-                               std::vector<Node>&& coords) {
-    UNIMPLEMENTED_IF_MSG(
-        (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
-            (texture_type == TextureType::TextureCube && is_array && depth_compare),
-        "This method is not supported.");
+                               TextureProcessMode process_mode, std::vector<Node> coords,
+                               Node array, Node depth_compare, u32 bias_offset) {
+    const bool is_array = array;
+    const bool is_shadow = depth_compare;

-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
+                         "This method is not supported.");
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);

    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
@@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    // LOD selection (either via bias or explicit textureLod) not supported in GL for
    // sampler2DArrayShadow and samplerCubeArrayShadow.
    const bool gl_lod_supported =
-        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
-          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));

    const OperationCode read_method =
-        lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
+        lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;

    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);

-    std::optional<u32> array_offset_value;
-    if (is_array)
-        array_offset_value = static_cast<u32>(array_offset);
-
-    const auto coords_count = static_cast<u32>(coords.size());
-
+    std::vector<Node> extras;
    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
        if (process_mode == TextureProcessMode::LZ) {
-            coords.push_back(Immediate(0.0f));
+            extras.push_back(Immediate(0.0f));
        } else {
            // If present, lod or bias are always stored in the register indexed by the gpr20
            // field with an offset depending on the usage of the other registers
-            coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+            extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
        }
    }

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset_value};
-        values[element] = Operation(read_method, std::move(meta), std::move(params));
+        auto copy_coords = coords;
+        MetaTexture meta{sampler, array, depth_compare, extras, element};
+        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

    return values;
@@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
    }
-    // 1D.DC in opengl the 2nd component is ignored.
+    // 1D.DC in OpenGL the 2nd component is ignored.
    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
        coords.push_back(Immediate(0.0f));
    }
-    std::size_t array_offset{};
-    if (is_array) {
-        array_offset = coords.size();
-        coords.push_back(GetRegister(array_register));
-    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
-        // or in the next register if lod or bias are used
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
+        dc = GetRegister(depth_register);
    }

-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
-                          0, std::move(coords));
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
 }

 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
            ? static_cast<u64>(instr.gpr20.Value())
            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;

    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
@@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }

-    std::size_t array_offset{};
-    if (is_array) {
-        array_offset = coords.size();
-        coords.push_back(GetRegister(array_register));
-    }
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
-        // or in the next register if lod or bias are used
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
+        dc = GetRegister(depth_register);
    }

-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
-                          (coord_count > 2 ? 1 : 0), std::move(coords));
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
 }

 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
@@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    const u64 coord_register = array_register + (is_array ? 1 : 0);

    std::vector<Node> coords;
-
-    for (size_t i = 0; i < coord_count; ++i) {
+    for (size_t i = 0; i < coord_count; ++i)
        coords.push_back(GetRegister(coord_register + i));
-    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }

    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
-        values[element] =
-            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }

    return values;
@@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de

 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
    const std::size_t type_coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;

    // If enabled arrays index is always stored in the gpr8 field
@@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
            : coord_register + 1;

    std::vector<Node> coords;
-
    for (std::size_t i = 0; i < type_coord_count; ++i) {
        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }
-    const auto coords_count = static_cast<u32>(coords.size());

-    if (lod_enabled) {
-        // When lod is used always is in grp20
-        coords.push_back(GetRegister(instr.gpr20));
-    } else {
-        coords.push_back(Immediate(0));
-    }
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+    // When lod is used always is in gpr20
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);

    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset};
-        values[element] =
-            Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, array, {}, {lod}, element};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
 }
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;

-u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
    }
 }

-void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) {
+void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src));
 }

-void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) {
+void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src));
 }

-void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) {
+void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value));
 }

-void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) {
+void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
    bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value));
 }

-void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) {
+void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) {
    SetRegister(bb, Register::ZeroIndex + 1 + id, value);
 }

-void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) {
+void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
    if (!sets_cc) {
        return;
    }
@@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c
    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }

-void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) {
+void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
    if (!sets_cc) {
        return;
    }
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -39,7 +39,7 @@ using NodeData =
                 PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
 using Node = const NodeData*;
 using Node4 = std::array<Node, 4>;
-using BasicBlock = std::vector<Node>;
+using NodeBlock = std::vector<Node>;

 constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;

@@ -156,12 +156,12 @@ enum class OperationCode {
    Logical2HNotEqual,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2

-    F4Texture,                /// (MetaTexture, float[N] coords, float[M] params) -> float4
-    F4TextureLod,             /// (MetaTexture, float[N] coords, float[M] params) -> float4
-    F4TextureGather,          /// (MetaTexture, float[N] coords, float[M] params) -> float4
-    F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4
-    F4TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
-    F4TexelFetch,             /// (MetaTexture, int[N], int) -> float4
+    Texture,                /// (MetaTexture, float[N] coords) -> float4
+    TextureLod,             /// (MetaTexture, float[N] coords) -> float4
+    TextureGather,          /// (MetaTexture, float[N] coords) -> float4
+    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
+    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
+    TexelFetch,             /// (MetaTexture, int[N], int) -> float4

    Branch,        /// (uint branch_target) -> void
    PushFlowStack, /// (uint branch_target) -> void
@@ -236,6 +236,11 @@ private:

 class ConstBuffer {
 public:
+    explicit ConstBuffer(u32 max_offset, bool is_indirect)
+        : max_offset{max_offset}, is_indirect{is_indirect} {}
+
+    ConstBuffer() = default;
+
    void MarkAsUsed(u64 offset) {
        max_offset = std::max(max_offset, static_cast<u32>(offset));
    }
@@ -252,6 +257,10 @@ public:
        return max_offset + sizeof(float);
    }

+    u32 GetMaxOffset() const {
+        return max_offset;
+    }
+
 private:
    u32 max_offset{};
    bool is_indirect{};
@@ -279,9 +288,10 @@ struct MetaHalfArithmetic {

 struct MetaTexture {
    const Sampler& sampler;
+    Node array{};
+    Node depth_compare{};
+    std::vector<Node> extras;
    u32 element{};
-    u32 coords_count{};
-    std::optional<u32> array_index;
 };

 constexpr MetaArithmetic PRECISE = {true};
@@ -530,7 +540,7 @@ public:
        Decode();
    }

-    const std::map<u32, BasicBlock>& GetBasicBlocks() const {
+    const std::map<u32, NodeBlock>& GetBasicBlocks() const {
        return basic_blocks;
    }

@@ -581,7 +591,7 @@ private:

    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);

-    BasicBlock DecodeRange(u32 begin, u32 end);
+    NodeBlock DecodeRange(u32 begin, u32 end);

    /**
     * Decodes a single instruction from Tegra to IR.
@@ -589,33 +599,33 @@ private:
     * @param pc Program counter. Offset to decode.
     * @return Next address to decode.
     */
-    u32 DecodeInstr(BasicBlock& bb, u32 pc);
+    u32 DecodeInstr(NodeBlock& bb, u32 pc);

-    u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc);
-    u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc);
+    u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeBfe(NodeBlock& bb, u32 pc);
+    u32 DecodeBfi(NodeBlock& bb, u32 pc);
+    u32 DecodeShift(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
+    u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
+    u32 DecodeFfma(NodeBlock& bb, u32 pc);
+    u32 DecodeHfma2(NodeBlock& bb, u32 pc);
+    u32 DecodeConversion(NodeBlock& bb, u32 pc);
+    u32 DecodeMemory(NodeBlock& bb, u32 pc);
+    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
+    u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
+    u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
+    u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
+    u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
+    u32 DecodeVideo(NodeBlock& bb, u32 pc);
+    u32 DecodeXmad(NodeBlock& bb, u32 pc);
+    u32 DecodeOther(NodeBlock& bb, u32 pc);

    /// Internalizes node's data and returns a managed pointer to a clone of that node
    Node StoreNode(NodeData&& node_data);
@@ -664,20 +674,20 @@ private:
    Node GetTemporal(u32 id);

    /// Sets a register. src value must be a number-evaluated node.
-    void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src);
+    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
    /// Sets a predicate. src value must be a bool-evaluated node
-    void SetPredicate(BasicBlock& bb, u64 dest, Node src);
+    void SetPredicate(NodeBlock& bb, u64 dest, Node src);
    /// Sets an internal flag. src value must be a bool-evaluated node
-    void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value);
+    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
    /// Sets a local memory address. address and value must be a number-evaluated node
-    void SetLocalMemory(BasicBlock& bb, Node address, Node value);
+    void SetLocalMemory(NodeBlock& bb, Node address, Node value);
    /// Sets a temporal. Internally it uses a post-RZ register
-    void SetTemporal(BasicBlock& bb, u32 id, Node value);
+    void SetTemporal(NodeBlock& bb, u32 id, Node value);

    /// Sets internal flags from a float
-    void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true);
+    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
    /// Sets internal flags from an integer
-    void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true);
+    void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);

    /// Conditionally absolute/negated float. Absolute is applied first
    Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
@@ -718,12 +728,12 @@ private:
    /// Extracts a sequence of bits from a node
    Node BitfieldExtract(Node value, u32 offset, u32 bits);

-    void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
+    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                  const Node4& components);

-    void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
+    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                   const Node4& components);
-    void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
+    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                       const Node4& components);

    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
@@ -745,23 +755,22 @@ private:
        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);

    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                         Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
-                         bool is_array, std::size_t array_offset, std::size_t bias_offset,
-                         std::vector<Node>&& coords);
+                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
+                         Node array, Node depth_compare, u32 bias_offset);

    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                         u64 byte_height);

-    void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest,
+    void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
                             Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
                             Tegra::Shader::PredicateResultMode predicate_mode,
                             Tegra::Shader::Pred predicate, bool sets_cc);
-    void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
+    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
                              Node op_c, Node imm_lut, bool sets_cc);

-    Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
+    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);

-    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
+    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);

    template <typename... T>
    Node Operation(OperationCode code, const T*... operands) {
@@ -803,7 +812,8 @@ private:
    u32 coverage_end{};
    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;

-    std::map<u32, BasicBlock> basic_blocks;
+    std::map<u32, NodeBlock> basic_blocks;
+    NodeBlock global_code;

    std::vector<std::unique_ptr<NodeData>> stored_nodes;

--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -11,7 +11,7 @@
 namespace VideoCommon::Shader {

 namespace {
-std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
+std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                   OperationCode operation_code) {
    for (; cursor >= 0; --cursor) {
        const Node node = code[cursor];
@@ -19,12 +19,19 @@ std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
            if (operation->GetCode() == operation_code)
                return {node, cursor};
        }
+        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+            const auto& code = conditional->GetCode();
+            const auto [found, internal_cursor] =
+                FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
+            if (found)
+                return {found, cursor};
+        }
    }
    return {};
 }
 } // namespace

-Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
    if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
        // Cbuf found, but it has to be immediate
        return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
        }
        return nullptr;
    }
+    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
+        const auto& code = conditional->GetCode();
+        return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
+    }
    return nullptr;
 }

-std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
+std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                             s64 cursor) {
    for (; cursor >= 0; --cursor) {
        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -182,7 +182,7 @@ struct TICEntry {
    };
    union {
        BitField<0, 16, u32> height_minus_1;
-        BitField<16, 15, u32> depth_minus_1;
+        BitField<16, 14, u32> depth_minus_1;
    };
    union {
        BitField<6, 13, u32> mip_lod_bias;
@@ -317,7 +317,6 @@ struct FullTextureInfo {
    u32 index;
    TICEntry tic;
    TSCEntry tsc;
-    bool enabled;
 };

 /// Returns the number of bytes per pixel of the input texture format.
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,8 +11,9 @@

 namespace VideoCore {

-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) {
-    return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system) {
+    return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
 }

 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@

 #include <memory>

+namespace Core {
+class System;
+}
+
 namespace Core::Frontend {
 class EmuWindow;
 }
@@ -20,7 +24,8 @@ class RendererBase;
 * @note The returned renderer instance is simply allocated. Its Init()
 *       function still needs to be called to fully complete its setup.
 */
-std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window);
+std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
+                                             Core::System& system);

 u16 GetResolutionScaleFactor(const RendererBase& renderer);

--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -29,6 +29,15 @@ void EmuThread::run() {

    stop_run = false;

+    emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
+
+    Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
+        stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
+            emit LoadProgress(stage, value, total);
+        });
+
+    emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
+
    // holds whether the cpu was running during the last iteration,
    // so that the DebugModeLeft signal can be emitted before the
    // next execution step
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -22,6 +22,10 @@ class GGLWidgetInternal;
 class GMainWindow;
 class GRenderWindow;

+namespace VideoCore {
+enum class LoadCallbackStage;
+}
+
 class EmuThread : public QThread {
    Q_OBJECT

@@ -75,7 +79,7 @@ public:
 private:
    bool exec_step = false;
    bool running = false;
-    std::atomic<bool> stop_run{false};
+    std::atomic_bool stop_run{false};
    std::mutex running_mutex;
    std::condition_variable running_cv;

@@ -101,6 +105,8 @@ signals:
    void DebugModeLeft();

    void ErrorThrown(Core::System::ResultStatus, std::string);
+
+    void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
 };

 class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -370,6 +370,8 @@ void Config::ReadValues() {
    Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
    Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
    Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
+    Settings::values.use_disk_shader_cache =
+        qt_config->value("use_disk_shader_cache", false).toBool();
    Settings::values.use_accurate_gpu_emulation =
        qt_config->value("use_accurate_gpu_emulation", false).toBool();

@@ -629,6 +631,7 @@ void Config::SaveValues() {
    qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
    qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
    qt_config->setValue("frame_limit", Settings::values.frame_limit);
+    qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
    qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);

    // Cast to double because Qt's written float values are not human-readable
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
        const QColor new_bg_color = QColorDialog::getColor(bg_color);
        if (!new_bg_color.isValid())
            return;
-        bg_color = new_bg_color;
-        ui->bg_button->setStyleSheet(
-            QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
+        UpdateBackgroundColorButton(new_bg_color);
    });
 }

@@ -75,11 +73,10 @@ void ConfigureGraphics::setConfiguration() {
        static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
    ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
    ui->frame_limit->setValue(Settings::values.frame_limit);
+    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
    ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
-    bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
-                                Settings::values.bg_blue);
-    ui->bg_button->setStyleSheet(
-        QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
+    UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
+                                                 Settings::values.bg_blue));
 }

 void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +84,19 @@ void ConfigureGraphics::applyConfiguration() {
        ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
    Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
    Settings::values.frame_limit = ui->frame_limit->value();
+    Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
    Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
    Settings::values.bg_red = static_cast<float>(bg_color.redF());
    Settings::values.bg_green = static_cast<float>(bg_color.greenF());
    Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
 }
+
+void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
+    bg_color = color;
+
+    QPixmap pixmap(ui->bg_button->size());
+    pixmap.fill(bg_color);
+
+    const QIcon color_icon(pixmap);
+    ui->bg_button->setIcon(color_icon);
+}
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -23,6 +23,8 @@ public:
 private:
    void setConfiguration();

+    void UpdateBackgroundColorButton(QColor color);
+
    std::unique_ptr<Ui::ConfigureGraphics> ui;
    QColor bg_color;
 };
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -49,6 +49,13 @@
          </item>
         </layout>
        </item>
+        <item>
+         <widget class="QCheckBox" name="use_disk_shader_cache">
+          <property name="text">
+           <string>Use disk shader cache</string>
+          </property>
+         </widget>
+        </item>
        <item>
         <widget class="QCheckBox" name="use_accurate_gpu_emulation">
          <property name="text">
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -43,6 +43,7 @@ QProgressBar {
 }
 QProgressBar::chunk {
  background-color: #0ab9e6;
+  width: 1px;
 })";

 constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
  padding: 2px;
 }
 QProgressBar::chunk {
- background-color: #ff3c28;
+  background-color: #ff3c28;
+  width: 1px;
 })";

 constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(
--- a/src/yuzu/loading_screen.ui
+++ b/src/yuzu/loading_screen.ui
@@ -132,7 +132,7 @@ border-radius: 15px;
 font: 75 15pt &quot;Arial&quot;;</string>
          </property>
          <property name="text">
-           <string>Stage 1 of 2. Estimate Time 5m 4s</string>
+           <string>Estimated Time 5m 4s</string>
          </property>
         </widget>
        </item>
@@ -146,6 +146,9 @@ font: 75 15pt &quot;Arial&quot;;</string>
        <property name="text">
         <string/>
        </property>
+        <property name="alignment">
+         <set>Qt::AlignCenter</set>
+        </property>
        <property name="margin">
         <number>30</number>
        </property>
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -887,6 +887,9 @@ void GMainWindow::BootGame(const QString& filename) {
    connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
            &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);

+    connect(emu_thread.get(), &EmuThread::LoadProgress, loading_screen,
+            &LoadingScreen::OnLoadProgress, Qt::QueuedConnection);
+
    // Update the GUI
    if (ui.action_Single_Window_Mode->isChecked()) {
        game_list->hide();
@@ -1682,12 +1685,16 @@ void GMainWindow::OnToggleFilterBar() {

 void GMainWindow::OnCaptureScreenshot() {
    OnPauseGame();
-    const QString path =
-        QFileDialog::getSaveFileName(this, tr("Capture Screenshot"),
-                                     UISettings::values.screenshot_path, tr("PNG Image (*.png)"));
-    if (!path.isEmpty()) {
-        UISettings::values.screenshot_path = QFileInfo(path).path();
-        render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
+    QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path,
+                           tr("PNG Image (*.png)"));
+    png_dialog.setAcceptMode(QFileDialog::AcceptSave);
+    png_dialog.setDefaultSuffix("png");
+    if (png_dialog.exec()) {
+        const QString path = png_dialog.selectedFiles().first();
+        if (!path.isEmpty()) {
+            UISettings::values.screenshot_path = QFileInfo(path).path();
+            render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
+        }
    }
    OnStartGame();
 }
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -350,6 +350,8 @@ void Config::ReadValues() {
    Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
    Settings::values.frame_limit =
        static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
+    Settings::values.use_disk_shader_cache =
+        sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
    Settings::values.use_accurate_gpu_emulation =
        sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);

--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,6 +110,10 @@ use_frame_limit =
 # 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
 frame_limit =

+# Whether to use disk based shader cache
+# 0 (default): Off, 1 : On
+use_disk_shader_cache =
+
 # Whether to use accurate GPU emulation
 # 0 (default): Off (fast), 1 : On (slow)
 use_accurate_gpu_emulation =
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ReinUsesLisp	e60d4d70bc	gl_shader_decompiler: Re-implement TLDS lod	2019-02-12 17:03:07 -03:00
bunnei	444231a83d	Merge pull request #2108 from FernandoS27/fix-cc Fix incorrect value for CC bit in IADD	2019-02-12 10:39:03 -05:00
bunnei	c1accfefde	Merge pull request #2109 from FernandoS27/fix-f2i Corrected F2I None mode to RoundEven.	2019-02-12 10:20:29 -05:00
bunnei	27e5efd265	Merge pull request #2068 from ReinUsesLisp/shader-cleanup-textures shader_ir: Clean texture management code	2019-02-12 10:20:15 -05:00
Fernando Sahmkow	f5ec165e8c	Corrected F2I None mode to RoundEven.	2019-02-11 18:46:45 -04:00
Fernando Sahmkow	edd668047c	Fix incorrect value for CC bit in IADD	2019-02-11 16:44:43 -04:00
bunnei	1d98027a0e	Merge pull request #1904 from bunnei/better-fermi-copy gl_rasterizer: Implement a more accurate fermi 2D copy.	2019-02-08 23:32:24 -05:00
bunnei	2374471a1e	Merge pull request #2096 from FearlessTobi/patch-3 nvdisp_disp0: change drawing message log level from Warning to Trace	2019-02-08 21:56:47 -05:00
Fernando Sahmkow	e543320129	Implement linear textures (#2089 )	2019-02-08 18:28:01 -05:00
bunnei	504aafedd2	Merge pull request #2097 from ReinUsesLisp/fixup-texview gl_rasterizer_cache: Fixup texture view parameters	2019-02-08 17:30:36 -05:00
ReinUsesLisp	e36e7ae74e	gl_rasterizer_cache: Fixup texture view parameters These parameters were declared as constants and passed to glTextureView but then they were removed on a rabase. This addresses that mistake.	2019-02-08 18:32:58 -03:00
Tobias	259e52ccb2	nvdisp_disp0: change drawing message log level from Warning to Trace This is a leftover from the early yuzu days. We shouldn't log every time when we are drawing by default, so let's change the log level to Trace.	2019-02-08 19:26:49 +01:00
ReinUsesLisp	889c646ac0	shader_ir: Remove F4 prefix to texture operations This was originally included because texture operations returned a vec4. These operations now return a single float and the F4 prefix doesn't mean anything.	2019-02-07 17:36:46 -03:00
ReinUsesLisp	d62b0a9e29	shader_ir: Clean texture management code Previous code relied on GLSL parameter order (something that's always ill-formed on an IR design). This approach passes spatial coordiantes through operation nodes and array and depth compare values in the the texture metadata. It still contains an "extra" vector containing generic nodes for bias and component index (for example) which is still a bit ill-formed but it should be better than the previous approach.	2019-02-07 00:46:13 -03:00
bunnei	f09d1dffd1	Merge pull request #2083 from ReinUsesLisp/shader-ir-cbuf-tracking shader/track: Add a more permissive global memory tracking	2019-02-06 21:56:14 -05:00
bunnei	35e1118766	gl_rasterizer_cache: Mark surface copy destinations as modified.	2019-02-06 21:54:25 -05:00
bunnei	dd1aab5446	gl_rasterizer: Implement a more accurate fermi 2D copy. - This is a blit, use the blit registers.	2019-02-06 21:54:21 -05:00
bunnei	ca482997fe	Merge pull request #2091 from FearlessTobi/port-4603 Port citra-emu/citra#4603: "gdbstub: only let Execute breakpoints write/restore BKPT opcodes into target memory"	2019-02-06 21:51:46 -05:00
bunnei	e09f1c92fb	Merge pull request #2021 from ReinUsesLisp/disk-cache gl_shader_cache: Disk based shader cache	2019-02-06 21:47:20 -05:00
ReinUsesLisp	dfd14618f7	cmake: Fix title bar issue	2019-02-06 22:23:41 -03:00
Frederic L	d0ac624403	gl_shader_disk_cache: Check LZ4 size limit Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-02-06 22:23:41 -03:00
Frederic L	9f0b247cf6	gl_shader_disk_cache: Consider compressed size zero as an error Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-02-06 22:23:41 -03:00
Frederic L	8ff2ce5207	cmake: Use CMAKE_COMMAND instead of "cmake" Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-02-06 22:23:41 -03:00
ReinUsesLisp	e6a2245304	gl_shader_disk_cache: Use unordered containers	2019-02-06 22:23:41 -03:00
ReinUsesLisp	e147ed4fc0	gl_shader_cache: Fixup GLSL unique identifiers	2019-02-06 22:23:40 -03:00
Michael	4ffb487251	cmake: Fixup application string Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-02-06 22:23:40 -03:00
ReinUsesLisp	bd928e70ed	loading_screen: Unchunk progress bar	2019-02-06 22:23:40 -03:00
ReinUsesLisp	eb73247433	gl_shader_cache: Link loading screen with disk shader cache load	2019-02-06 22:23:40 -03:00
ReinUsesLisp	df0f31f44e	gl_shader_cache: Set GL_PROGRAM_SEPARABLE to dumped shaders i965 (and probably all mesa drivers) require GL_PROGRAM_SEPARABLE when using glProgramBinary. This is probably required by the standard but it's ignored by permisive proprietary drivers.	2019-02-06 22:23:40 -03:00
ReinUsesLisp	7fefec585c	gl_shader_disk_cache: Pass core system as argument and guard against games without title ids	2019-02-06 22:23:40 -03:00
ReinUsesLisp	2bc6a699dc	gl_shader_disk_cache: Guard reads and writes against failure	2019-02-06 22:23:40 -03:00
ReinUsesLisp	750abcc23d	gl_shader_disk_cache: Address miscellaneous feedback	2019-02-06 22:23:40 -03:00
ReinUsesLisp	8ee3666a3c	gl_shader_disk_cache: Pass return values returning instead of by parameters	2019-02-06 22:23:40 -03:00
ReinUsesLisp	ed956569a4	gl_shader_disk_cache: Compress program binaries using LZ4	2019-02-06 22:23:39 -03:00
ReinUsesLisp	f087639e4a	gl_shader_disk_cache: Compress GLSL code using LZ4	2019-02-06 22:23:39 -03:00
ReinUsesLisp	cfb20c4c9d	gl_shader_disk_cache: Save GLSL and entries into the precompiled file	2019-02-06 22:23:39 -03:00
ReinUsesLisp	e78da8dc1f	settings: Hide shader cache behind a setting	2019-02-06 22:20:57 -03:00
ReinUsesLisp	be4641c43f	gl_shader_disk_cache: Invalidate shader cache changes with CMake hash	2019-02-06 22:20:57 -03:00
ReinUsesLisp	a3703f5767	gl_shader_cache: Refactor to support disk shader cache	2019-02-06 22:20:57 -03:00
ReinUsesLisp	4039086226	gl_shader_disk_cache: Add transferable cache invalidation	2019-02-06 22:20:57 -03:00
ReinUsesLisp	a1faed9950	gl_shader_disk_cache: Add precompiled load	2019-02-06 22:20:57 -03:00
ReinUsesLisp	57fb15d2a3	gl_shader_disk_cache: Add precompiled save	2019-02-06 22:20:57 -03:00
ReinUsesLisp	3435cd8d5e	gl_shader_disk_cache: Add transferable load	2019-02-06 22:20:57 -03:00
ReinUsesLisp	b1efceec89	gl_shader_disk_cache: Add transferable stores	2019-02-06 22:20:57 -03:00
ReinUsesLisp	98be5a4928	gl_shader_disk_cache: Add ShaderDiskCacheOpenGL class and helpers	2019-02-06 22:20:57 -03:00
ReinUsesLisp	145c3ac89e	gl_shader_disk_cache: Add file and move BaseBindings declaration	2019-02-06 22:20:57 -03:00
ReinUsesLisp	c2c5260fd7	gl_shader_decompiler: Remove name entries	2019-02-06 22:20:57 -03:00
ReinUsesLisp	8b11368671	gl_shader_util: Add parameter to handle retrievable programs	2019-02-06 22:20:57 -03:00
ReinUsesLisp	0ed5d728ca	rasterizer_interface: Add disk cache entry for the rasterizer	2019-02-06 22:20:57 -03:00
ReinUsesLisp	84412591c9	file_util: Add shader directory	2019-02-06 22:20:57 -03:00
ReinUsesLisp	049050856f	shader_decode: Implement LDG and basic cbuf tracking	2019-02-06 22:20:57 -03:00
bunnei	10ab714fe0	Merge pull request #2042 from ReinUsesLisp/nouveau-tex maxwell_3d: Allow texture handles with TIC id zero	2019-02-06 20:19:20 -05:00
bunnei	40ac058557	Merge pull request #2071 from ReinUsesLisp/dsa-texture gl_rasterizer: Use DSA for textures and move swizzling to texture state	2019-02-06 20:17:59 -05:00
Dimitri ALBORA	8b800369ea	gdbstub: only let Execute breakpoints write/restore BKPT opcodes into target memory	2019-02-06 19:07:35 +01:00
bunnei	c357d8f6f7	Merge pull request #2057 from FearlessTobi/port-4586 Port citra-emu/citra#4586: "Use QPixmap/QIcon for background color selection button"	2019-02-06 12:37:57 -05:00
bunnei	b34ae2235d	Merge pull request #2086 from FearlessTobi/port-4583 Port citra-emu/citra#4583: "citra_qt: Fix saving screenshot when no file extension is provided"	2019-02-06 12:33:35 -05:00
bunnei	40cd299f01	Merge pull request #2087 from lioncash/const service/nvflinger, service/vi: Improve error case handling	2019-02-06 12:33:13 -05:00
bunnei	67c1f31251	Merge pull request #2088 from jroweboy/h QT: Fix the loading screen 'H' switch logo to not glitch out	2019-02-05 21:06:39 -05:00
James Rowe	c82b0afb69	QT: Fix the loading screen 'H' switch logo to not glitch out	2019-02-05 18:24:15 -07:00
Lioncash	ef073ff117	service/nvflinger,service/vi: Handle failure cases with exposed API Converts many of the Find* functions to return a std::optional<T> as opposed to returning the raw return values directly. This allows removing a few assertions and handles error cases like the service itself does.	2019-02-05 18:03:28 -05:00
bunnei	7aa7d8f4ff	Merge pull request #2085 from ReinUsesLisp/cube-minus-one video_core/texture: Fix BitField size for depth_minus_one	2019-02-05 17:15:26 -05:00
xperia64	f598490b57	Fix crash when no files are selected	2019-02-05 22:40:23 +01:00
xperia64	284536a626	Add file extension to screenshot filename if not provided	2019-02-05 22:31:37 +01:00
Lioncash	7320c667df	service/nvflinger: Mark FindVsyncEvent() as a const member function This member function doesn't actually modify instance state, so it can be marked as a const member function.	2019-02-05 15:57:29 -05:00
Lioncash	3c02cdcc57	service/nvflinger: Rename GetVsyncEvent() to FindVsyncEvent() This was missed within #2075. Renames the member function to make it consistent with the rest of the Find* functions.	2019-02-05 15:55:18 -05:00
ReinUsesLisp	b5e685b297	video_core/texture: Fix BitField size for depth_minus_one	2019-02-05 04:32:06 -03:00
ReinUsesLisp	0d1d755086	shader/track: Search inside of conditional nodes Some games search conditionally use global memory instructions. This allows the heuristic to search inside conditional nodes for the source constant buffer.	2019-02-03 17:21:20 -03:00
ReinUsesLisp	42b75e8be8	shader_ir: Rename BasicBlock to NodeBlock It's not always used as a basic block. Rename it for consistency.	2019-02-03 17:21:20 -03:00
ReinUsesLisp	6a6fabea58	shader_ir: Pass decoded nodes as a whole instead of per basic blocks Some games call LDG at the top of a basic block, making the tracking heuristic to fail. This commit lets the heuristic the decoded nodes as a whole instead of per basic blocks. This may lead to some false positives but allows it the heuristic to track cases it previously couldn't.	2019-02-03 17:21:20 -03:00
ReinUsesLisp	2bdbb90af7	video_core: Assert on invalid GPU to CPU address queries	2019-02-03 04:58:40 -03:00
ReinUsesLisp	04e68e9738	maxwell_3d: Allow sampler handles with TSC id zero	2019-02-03 04:58:40 -03:00
ReinUsesLisp	390721a561	maxwell_3d: Allow texture handles with TIC id zero Also remove "enabled" field from Tegra::Texture::FullTextureInfo because it would become unused.	2019-02-03 04:58:24 -03:00
ReinUsesLisp	e01a9de35f	memory_manager: Check for reserved page status	2019-02-03 04:58:24 -03:00
ReinUsesLisp	3e80b08944	gl_rasterizer_cache: Fixup test clause	2019-01-30 19:10:35 -03:00
Mat M	911587fb8d	gl_rasterizer_cache: Guard clause swizzle testing Co-Authored-By: ReinUsesLisp <reinuseslisp@airmail.cc>	2019-01-30 19:10:35 -03:00
ReinUsesLisp	220df45b7d	gl_state: Remove texture target tracking	2019-01-30 19:10:35 -03:00
ReinUsesLisp	704744bb72	gl_rasterizer_cache: Move swizzling to textures instead of state	2019-01-30 19:10:35 -03:00
ReinUsesLisp	3bbaa98c78	gl_state: Use DSA and multi bind to update texture bindings	2019-01-30 19:10:11 -03:00
ReinUsesLisp	4b676e7786	gl_rasterizer: Use DSA for textures	2019-01-30 19:10:11 -03:00
xperia64	32eb080e02	Use QPixmap/QIcon for background color selection button	2019-01-26 15:08:54 +01:00