vk_rasterizer: Fix dynamic StencilOp updating when two faces are enabled

This function was incorrectly using the stencil_two_side_enable register when dynamically updating the StencilOp.
Merge pull request #6846 from ameerj/nvdec-gpu-decode
2021-09-12 16:19:12 -04:00 · 2021-09-11 23:11:32 +02:00 · 2021-09-11 22:36:22 +02:00 · 2021-09-11 22:36:03 +02:00 · 2021-09-11 22:35:52 +02:00 · 2021-09-11 22:35:25 +02:00
127 changed files with 2974 additions and 2187 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -376,7 +376,7 @@ if (ENABLE_SDL2)
    if (YUZU_USE_BUNDLED_SDL2)
        # Detect toolchain and platform
        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
-            set(SDL2_VER "SDL2-2.0.15-prerelease")
+            set(SDL2_VER "SDL2-2.0.16")
        else()
            message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
        endif()
@@ -396,7 +396,7 @@ if (ENABLE_SDL2)
    elseif (YUZU_USE_EXTERNAL_SDL2)
        message(STATUS "Using SDL2 from externals.")
    else()
-        find_package(SDL2 2.0.15 REQUIRED)
+        find_package(SDL2 2.0.16 REQUIRED)

        # Some installations don't set SDL2_LIBRARIES
        if("${SDL2_LIBRARIES}" STREQUAL "")
@@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS
    avutil
    swscale)

+if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+    Include(FindPkgConfig REQUIRED)
+    pkg_check_modules(LIBVA libva)
+endif()
 if (NOT YUZU_USE_BUNDLED_FFMPEG)
    # Use system installed FFmpeg
    find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@@ -540,6 +544,9 @@ endif()

 if (YUZU_USE_BUNDLED_FFMPEG)
    if (NOT WIN32)
+        # TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
+        # externals/ffmpeg/ffmpeg)
+
        # Build FFmpeg from externals
        message(STATUS "Using FFmpeg from externals")

@@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                CACHE PATH "Paths to FFmpeg libraries" FORCE)
        endforeach()

-        set(FFmpeg_INCLUDE_DIR
-            "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
-            CACHE PATH "Path to FFmpeg headers" FORCE)
+        Include(FindPkgConfig REQUIRED)
+        pkg_check_modules(LIBVA libva)
+        pkg_check_modules(CUDA cuda)
+        pkg_check_modules(FFNVCODEC ffnvcodec)
+        pkg_check_modules(VDPAU vdpau)
+
+        set(FFmpeg_HWACCEL_LIBRARIES)
+        set(FFmpeg_HWACCEL_FLAGS)
+        set(FFmpeg_HWACCEL_INCLUDE_DIRS)
+        set(FFmpeg_HWACCEL_LDFLAGS)

-        if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-            Include(FindPkgConfig REQUIRED)
-            pkg_check_modules(LIBVA libva)
-        endif()
        if(LIBVA_FOUND)
            pkg_check_modules(LIBDRM libdrm REQUIRED)
            find_package(X11 REQUIRED)
            pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
            pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
-            set(FFmpeg_LIBVA_LIBRARIES
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES
                ${LIBDRM_LIBRARIES}
                ${X11_LIBRARIES}
                ${LIBVA-DRM_LIBRARIES}
@@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                --enable-hwaccel=h264_vaapi
                --enable-hwaccel=vp9_vaapi
                --enable-libdrm)
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+                ${LIBDRM_INCLUDE_DIRS}
+                ${X11_INCLUDE_DIRS}
+                ${LIBVA-DRM_INCLUDE_DIRS}
+                ${LIBVA-X11_INCLUDE_DIRS}
+                ${LIBVA_INCLUDE_DIRS}
+            )
            message(STATUS "VA-API found")
        else()
            set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
        endif()

+        if (FFNVCODEC_FOUND AND CUDA_FOUND)
+            list(APPEND FFmpeg_HWACCEL_FLAGS
+                --enable-cuvid
+                --enable-ffnvcodec
+                --enable-nvdec
+                --enable-hwaccel=h264_nvdec
+                --enable-hwaccel=vp9_nvdec
+                --extra-cflags=-I${CUDA_INCLUDE_DIRS}
+            )
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES
+                ${FFNVCODEC_LIBRARIES}
+                ${CUDA_LIBRARIES}
+            )
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+                ${FFNVCODEC_INCLUDE_DIRS}
+                ${CUDA_INCLUDE_DIRS}
+            )
+            list(APPEND FFmpeg_HWACCEL_LDFLAGS
+                ${FFNVCODEC_LDFLAGS}
+                ${CUDA_LDFLAGS}
+            )
+            message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
+        endif()
+
+        if (VDPAU_FOUND)
+            list(APPEND FFmpeg_HWACCEL_FLAGS
+                --enable-vdpau
+                --enable-hwaccel=h264_vdpau
+                --enable-hwaccel=vp9_vdpau
+            )
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
+            list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
+            message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
+        else()
+            list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
+        endif()
+
        # `configure` parameters builds only exactly what yuzu needs from FFmpeg
        # `--disable-vdpau` is needed to avoid linking issues
        add_custom_command(
@@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                    --disable-network
                    --disable-postproc
                    --disable-swresample
-                    --disable-vdpau
                    --enable-decoder=h264
                    --enable-decoder=vp9
                    --cc="${CMAKE_C_COMPILER}"
@@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                ${FFmpeg_BUILD_DIR}
        )

+        set(FFmpeg_INCLUDE_DIR
+            "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
+            CACHE PATH "Path to FFmpeg headers" FORCE)
+
+        set(FFmpeg_LDFLAGS
+            "${FFmpeg_HWACCEL_LDFLAGS}"
+            CACHE STRING "FFmpeg linker flags" FORCE)
+
        # ALL makes this custom target build every time
        # but it won't actually build if the DEPENDS parameter is up to date
        add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
        add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
        link_libraries(${FFmpeg_LIBVA_LIBRARIES})
-        set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
+        set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES}
            CACHE PATH "Paths to FFmpeg libraries" FORCE)
        unset(FFmpeg_BUILD_LIBRARIES)
-        unset(FFmpeg_LIBVA_LIBRARIES)
+        unset(FFmpeg_HWACCEL_FLAGS)
+        unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
+        unset(FFmpeg_HWACCEL_LDFLAGS)
+        unset(FFmpeg_HWACCEL_LIBRARIES)

        if (FFmpeg_FOUND)
            message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@@ -670,12 +735,13 @@ if (YUZU_USE_BUNDLED_FFMPEG)
        endif()
    else() # WIN32
        # Use yuzu FFmpeg binaries
-        set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
+        set(FFmpeg_EXT_NAME "ffmpeg-4.4")
        set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
        download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
        set(FFmpeg_FOUND YES)
        set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
        set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
+        set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
        set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
        set(FFmpeg_LIBRARIES
            ${FFmpeg_LIBRARY_DIR}/swscale.lib
@@ -701,7 +767,7 @@ if (APPLE)
 elseif (WIN32)
    # WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
    add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
-    set(PLATFORM_LIBRARIES winmm ws2_32)
+    set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
    if (MINGW)
        # PSAPI is the Process Status API
        set(PLATFORM_LIBRARIES ${PLATFORM_LIBRARIES} psapi imm32 version)
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -7,7 +7,9 @@ include(DownloadExternals)
 # xbyak
 if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
    add_library(xbyak INTERFACE)
-    target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
+    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
+    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
+    target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
    target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
 endif()

@@ -19,6 +21,7 @@ target_include_directories(catch-single-include INTERFACE catch/single_include)
 if (ARCHITECTURE_x86_64)
    set(DYNARMIC_TESTS OFF)
    set(DYNARMIC_NO_BUNDLED_FMT ON)
+    set(DYNARMIC_IGNORE_ASSERTS ON CACHE BOOL "" FORCE)
    add_subdirectory(dynarmic)
 endif()

--- a/externals/SDL
+++ b/externals/SDL
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -2,13 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <algorithm>
 #include <atomic>
 #include <chrono>
 #include <climits>
-#include <condition_variable>
-#include <memory>
-#include <mutex>
+#include <exception>
 #include <thread>
 #include <vector>

@@ -16,28 +13,174 @@
 #include <windows.h> // For OutputDebugStringW
 #endif

-#include "common/assert.h"
 #include "common/fs/file.h"
 #include "common/fs/fs.h"
+#include "common/fs/fs_paths.h"
+#include "common/fs/path_util.h"
 #include "common/literals.h"
+#include "common/thread.h"

 #include "common/logging/backend.h"
 #include "common/logging/log.h"
 #include "common/logging/text_formatter.h"
 #include "common/settings.h"
+#ifdef _WIN32
 #include "common/string_util.h"
+#endif
 #include "common/threadsafe_queue.h"

 namespace Common::Log {

+namespace {
+
+/**
+ * Interface for logging backends.
+ */
+class Backend {
+public:
+    virtual ~Backend() = default;
+
+    virtual void Write(const Entry& entry) = 0;
+
+    virtual void EnableForStacktrace() = 0;
+
+    virtual void Flush() = 0;
+};
+
+/**
+ * Backend that writes to stderr and with color
+ */
+class ColorConsoleBackend final : public Backend {
+public:
+    explicit ColorConsoleBackend() = default;
+
+    ~ColorConsoleBackend() override = default;
+
+    void Write(const Entry& entry) override {
+        if (enabled.load(std::memory_order_relaxed)) {
+            PrintColoredMessage(entry);
+        }
+    }
+
+    void Flush() override {
+        // stderr shouldn't be buffered
+    }
+
+    void EnableForStacktrace() override {
+        enabled = true;
+    }
+
+    void SetEnabled(bool enabled_) {
+        enabled = enabled_;
+    }
+
+private:
+    std::atomic_bool enabled{false};
+};
+
+/**
+ * Backend that writes to a file passed into the constructor
+ */
+class FileBackend final : public Backend {
+public:
+    explicit FileBackend(const std::filesystem::path& filename) {
+        auto old_filename = filename;
+        old_filename += ".old.txt";
+
+        // Existence checks are done within the functions themselves.
+        // We don't particularly care if these succeed or not.
+        static_cast<void>(FS::RemoveFile(old_filename));
+        static_cast<void>(FS::RenameFile(filename, old_filename));
+
+        file = std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write,
+                                            FS::FileType::TextFile);
+    }
+
+    ~FileBackend() override = default;
+
+    void Write(const Entry& entry) override {
+        if (!enabled) {
+            return;
+        }
+
+        bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n'));
+
+        using namespace Common::Literals;
+        // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
+        const auto write_limit = Settings::values.extended_logging ? 1_GiB : 100_MiB;
+        const bool write_limit_exceeded = bytes_written > write_limit;
+        if (entry.log_level >= Level::Error || write_limit_exceeded) {
+            if (write_limit_exceeded) {
+                // Stop writing after the write limit is exceeded.
+                // Don't close the file so we can print a stacktrace if necessary
+                enabled = false;
+            }
+            file->Flush();
+        }
+    }
+
+    void Flush() override {
+        file->Flush();
+    }
+
+    void EnableForStacktrace() override {
+        enabled = true;
+        bytes_written = 0;
+    }
+
+private:
+    std::unique_ptr<FS::IOFile> file;
+    bool enabled = true;
+    std::size_t bytes_written = 0;
+};
+
+/**
+ * Backend that writes to Visual Studio's output window
+ */
+class DebuggerBackend final : public Backend {
+public:
+    explicit DebuggerBackend() = default;
+
+    ~DebuggerBackend() override = default;
+
+    void Write(const Entry& entry) override {
+#ifdef _WIN32
+        ::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str());
+#endif
+    }
+
+    void Flush() override {}
+
+    void EnableForStacktrace() override {}
+};
+
+bool initialization_in_progress_suppress_logging = true;
+
 /**
 * Static state as a singleton.
 */
 class Impl {
 public:
    static Impl& Instance() {
-        static Impl backend;
-        return backend;
+        if (!instance) {
+            throw std::runtime_error("Using Logging instance before its initialization");
+        }
+        return *instance;
+    }
+
+    static void Initialize() {
+        if (instance) {
+            LOG_WARNING(Log, "Reinitializing logging backend");
+            return;
+        }
+        using namespace Common::FS;
+        const auto& log_dir = GetYuzuPath(YuzuPath::LogDir);
+        void(CreateDir(log_dir));
+        Filter filter;
+        filter.ParseFilterString(Settings::values.log_filter.GetValue());
+        instance = std::unique_ptr<Impl, decltype(&Deleter)>(new Impl(log_dir / LOG_FILE, filter),
+                                                             Deleter);
+        initialization_in_progress_suppress_logging = false;
    }

    Impl(const Impl&) = delete;
@@ -46,74 +189,54 @@ public:
    Impl(Impl&&) = delete;
    Impl& operator=(Impl&&) = delete;

-    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
-                   const char* function, std::string message) {
-        message_queue.Push(
-            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
-    }
-
-    void AddBackend(std::unique_ptr<Backend> backend) {
-        std::lock_guard lock{writing_mutex};
-        backends.push_back(std::move(backend));
-    }
-
-    void RemoveBackend(std::string_view backend_name) {
-        std::lock_guard lock{writing_mutex};
-
-        std::erase_if(backends, [&backend_name](const auto& backend) {
-            return backend_name == backend->GetName();
-        });
-    }
-
-    const Filter& GetGlobalFilter() const {
-        return filter;
-    }
-
    void SetGlobalFilter(const Filter& f) {
        filter = f;
    }

-    Backend* GetBackend(std::string_view backend_name) {
-        const auto it =
-            std::find_if(backends.begin(), backends.end(),
-                         [&backend_name](const auto& i) { return backend_name == i->GetName(); });
-        if (it == backends.end())
-            return nullptr;
-        return it->get();
+    void SetColorConsoleBackendEnabled(bool enabled) {
+        color_console_backend.SetEnabled(enabled);
+    }
+
+    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
+                   const char* function, std::string message) {
+        if (!filter.CheckMessage(log_class, log_level))
+            return;
+        const Entry& entry =
+            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message));
+        message_queue.Push(entry);
    }

 private:
-    Impl() {
-        backend_thread = std::thread([&] {
-            Entry entry;
-            auto write_logs = [&](Entry& e) {
-                std::lock_guard lock{writing_mutex};
-                for (const auto& backend : backends) {
-                    backend->Write(e);
-                }
-            };
-            while (true) {
-                entry = message_queue.PopWait();
-                if (entry.final_entry) {
-                    break;
-                }
-                write_logs(entry);
-            }
-
-            // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
-            // case where a system is repeatedly spamming logs even on close.
-            const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
-            int logs_written = 0;
-            while (logs_written++ < MAX_LOGS_TO_WRITE && message_queue.Pop(entry)) {
-                write_logs(entry);
-            }
-        });
-    }
+    Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
+        : filter{filter_}, file_backend{file_backend_filename}, backend_thread{std::thread([this] {
+              Common::SetCurrentThreadName("yuzu:Log");
+              Entry entry;
+              const auto write_logs = [this, &entry]() {
+                  ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
+              };
+              while (true) {
+                  entry = message_queue.PopWait();
+                  if (entry.final_entry) {
+                      break;
+                  }
+                  write_logs();
+              }
+              // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a
+              // case where a system is repeatedly spamming logs even on close.
+              int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100;
+              while (max_logs_to_write-- && message_queue.Pop(entry)) {
+                  write_logs();
+              }
+          })} {}

    ~Impl() {
-        Entry entry;
-        entry.final_entry = true;
-        message_queue.Push(entry);
+        StopBackendThread();
+    }
+
+    void StopBackendThread() {
+        Entry stop_entry{};
+        stop_entry.final_entry = true;
+        message_queue.Push(stop_entry);
        backend_thread.join();
    }

@@ -135,100 +258,51 @@ private:
        };
    }

-    std::mutex writing_mutex;
-    std::thread backend_thread;
-    std::vector<std::unique_ptr<Backend>> backends;
-    MPSCQueue<Entry> message_queue;
+    void ForEachBackend(auto lambda) {
+        lambda(static_cast<Backend&>(debugger_backend));
+        lambda(static_cast<Backend&>(color_console_backend));
+        lambda(static_cast<Backend&>(file_backend));
+    }
+
+    static void Deleter(Impl* ptr) {
+        delete ptr;
+    }
+
+    static inline std::unique_ptr<Impl, decltype(&Deleter)> instance{nullptr, Deleter};
+
    Filter filter;
+    DebuggerBackend debugger_backend{};
+    ColorConsoleBackend color_console_backend{};
+    FileBackend file_backend;
+
+    std::thread backend_thread;
+    MPSCQueue<Entry> message_queue{};
    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
 };
+} // namespace

-ConsoleBackend::~ConsoleBackend() = default;
-
-void ConsoleBackend::Write(const Entry& entry) {
-    PrintMessage(entry);
+void Initialize() {
+    Impl::Initialize();
 }

-ColorConsoleBackend::~ColorConsoleBackend() = default;
-
-void ColorConsoleBackend::Write(const Entry& entry) {
-    PrintColoredMessage(entry);
-}
-
-FileBackend::FileBackend(const std::filesystem::path& filename) {
-    auto old_filename = filename;
-    old_filename += ".old.txt";
-
-    // Existence checks are done within the functions themselves.
-    // We don't particularly care if these succeed or not.
-    FS::RemoveFile(old_filename);
-    void(FS::RenameFile(filename, old_filename));
-
-    file =
-        std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write, FS::FileType::TextFile);
-}
-
-FileBackend::~FileBackend() = default;
-
-void FileBackend::Write(const Entry& entry) {
-    if (!file->IsOpen()) {
-        return;
-    }
-
-    using namespace Common::Literals;
-    // Prevent logs from exceeding a set maximum size in the event that log entries are spammed.
-    constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB;
-    constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB;
-
-    const bool write_limit_exceeded =
-        bytes_written > MAX_BYTES_WRITTEN_EXTENDED ||
-        (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging);
-
-    // Close the file after the write limit is exceeded.
-    if (write_limit_exceeded) {
-        file->Close();
-        return;
-    }
-
-    bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n'));
-    if (entry.log_level >= Level::Error) {
-        file->Flush();
-    }
-}
-
-DebuggerBackend::~DebuggerBackend() = default;
-
-void DebuggerBackend::Write(const Entry& entry) {
-#ifdef _WIN32
-    ::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str());
-#endif
+void DisableLoggingInTests() {
+    initialization_in_progress_suppress_logging = true;
 }

 void SetGlobalFilter(const Filter& filter) {
    Impl::Instance().SetGlobalFilter(filter);
 }

-void AddBackend(std::unique_ptr<Backend> backend) {
-    Impl::Instance().AddBackend(std::move(backend));
-}
-
-void RemoveBackend(std::string_view backend_name) {
-    Impl::Instance().RemoveBackend(backend_name);
-}
-
-Backend* GetBackend(std::string_view backend_name) {
-    return Impl::Instance().GetBackend(backend_name);
+void SetColorConsoleBackendEnabled(bool enabled) {
+    Impl::Instance().SetColorConsoleBackendEnabled(enabled);
 }

 void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
                       unsigned int line_num, const char* function, const char* format,
                       const fmt::format_args& args) {
-    auto& instance = Impl::Instance();
-    const auto& filter = instance.GetGlobalFilter();
-    if (!filter.CheckMessage(log_class, log_level))
-        return;
-
-    instance.PushEntry(log_class, log_level, filename, line_num, function,
-                       fmt::vformat(format, args));
+    if (!initialization_in_progress_suppress_logging) {
+        Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function,
+                                   fmt::vformat(format, args));
+    }
 }
 } // namespace Common::Log
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -5,120 +5,21 @@
 #pragma once

 #include <filesystem>
-#include <memory>
-#include <string>
-#include <string_view>
 #include "common/logging/filter.h"
-#include "common/logging/log.h"
-
-namespace Common::FS {
-class IOFile;
-}

 namespace Common::Log {

 class Filter;

-/**
- * Interface for logging backends. As loggers can be created and removed at runtime, this can be
- * used by a frontend for adding a custom logging backend as needed
- */
-class Backend {
-public:
-    virtual ~Backend() = default;
+/// Initializes the logging system. This should be the first thing called in main.
+void Initialize();

-    virtual void SetFilter(const Filter& new_filter) {
-        filter = new_filter;
-    }
-    virtual const char* GetName() const = 0;
-    virtual void Write(const Entry& entry) = 0;
-
-private:
-    Filter filter;
-};
+void DisableLoggingInTests();

 /**
- * Backend that writes to stderr without any color commands
- */
-class ConsoleBackend : public Backend {
-public:
-    ~ConsoleBackend() override;
-
-    static const char* Name() {
-        return "console";
-    }
-    const char* GetName() const override {
-        return Name();
-    }
-    void Write(const Entry& entry) override;
-};
-
-/**
- * Backend that writes to stderr and with color
- */
-class ColorConsoleBackend : public Backend {
-public:
-    ~ColorConsoleBackend() override;
-
-    static const char* Name() {
-        return "color_console";
-    }
-
-    const char* GetName() const override {
-        return Name();
-    }
-    void Write(const Entry& entry) override;
-};
-
-/**
- * Backend that writes to a file passed into the constructor
- */
-class FileBackend : public Backend {
-public:
-    explicit FileBackend(const std::filesystem::path& filename);
-    ~FileBackend() override;
-
-    static const char* Name() {
-        return "file";
-    }
-
-    const char* GetName() const override {
-        return Name();
-    }
-
-    void Write(const Entry& entry) override;
-
-private:
-    std::unique_ptr<FS::IOFile> file;
-    std::size_t bytes_written = 0;
-};
-
-/**
- * Backend that writes to Visual Studio's output window
- */
-class DebuggerBackend : public Backend {
-public:
-    ~DebuggerBackend() override;
-
-    static const char* Name() {
-        return "debugger";
-    }
-    const char* GetName() const override {
-        return Name();
-    }
-    void Write(const Entry& entry) override;
-};
-
-void AddBackend(std::unique_ptr<Backend> backend);
-
-void RemoveBackend(std::string_view backend_name);
-
-Backend* GetBackend(std::string_view backend_name);
-
-/**
- * The global filter will prevent any messages from even being processed if they are filtered. Each
- * backend can have a filter, but if the level is lower than the global filter, the backend will
- * never get the message
+ * The global filter will prevent any messages from even being processed if they are filtered.
 */
 void SetGlobalFilter(const Filter& filter);
-} // namespace Common::Log
+
+void SetColorConsoleBackendEnabled(bool enabled);
+} // namespace Common::Log
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
    SUB(Service, NCM)                                                                              \
    SUB(Service, NFC)                                                                              \
    SUB(Service, NFP)                                                                              \
+    SUB(Service, NGCT)                                                                             \
    SUB(Service, NIFM)                                                                             \
    SUB(Service, NIM)                                                                              \
    SUB(Service, NPNS)                                                                             \
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -81,6 +81,7 @@ enum class Class : u8 {
    Service_NCM,       ///< The NCM service
    Service_NFC,       ///< The NFC (Near-field communication) service
    Service_NFP,       ///< The NFP service
+    Service_NGCT,      ///< The NGCT (No Good Content for Terra) service
    Service_NIFM,      ///< The NIFM (Network interface) service
    Service_NIM,       ///< The NIM service
    Service_NPNS,      ///< The NPNS service
--- a/src/common/lru_cache.h
+++ b/src/common/lru_cache.h
@@ -0,0 +1,140 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2+ or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <deque>
+#include <memory>
+#include <type_traits>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+template <class Traits>
+class LeastRecentlyUsedCache {
+    using ObjectType = typename Traits::ObjectType;
+    using TickType = typename Traits::TickType;
+
+    struct Item {
+        ObjectType obj;
+        TickType tick;
+        Item* next{};
+        Item* prev{};
+    };
+
+public:
+    LeastRecentlyUsedCache() : first_item{}, last_item{} {}
+    ~LeastRecentlyUsedCache() = default;
+
+    size_t Insert(ObjectType obj, TickType tick) {
+        const auto new_id = Build();
+        auto& item = item_pool[new_id];
+        item.obj = obj;
+        item.tick = tick;
+        Attach(item);
+        return new_id;
+    }
+
+    void Touch(size_t id, TickType tick) {
+        auto& item = item_pool[id];
+        if (item.tick >= tick) {
+            return;
+        }
+        item.tick = tick;
+        if (&item == last_item) {
+            return;
+        }
+        Detach(item);
+        Attach(item);
+    }
+
+    void Free(size_t id) {
+        auto& item = item_pool[id];
+        Detach(item);
+        item.prev = nullptr;
+        item.next = nullptr;
+        free_items.push_back(id);
+    }
+
+    template <typename Func>
+    void ForEachItemBelow(TickType tick, Func&& func) {
+        static constexpr bool RETURNS_BOOL =
+            std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
+        Item* iterator = first_item;
+        while (iterator) {
+            if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
+                return;
+            }
+            Item* next = iterator->next;
+            if constexpr (RETURNS_BOOL) {
+                if (func(iterator->obj)) {
+                    return;
+                }
+            } else {
+                func(iterator->obj);
+            }
+            iterator = next;
+        }
+    }
+
+private:
+    size_t Build() {
+        if (free_items.empty()) {
+            const size_t item_id = item_pool.size();
+            auto& item = item_pool.emplace_back();
+            item.next = nullptr;
+            item.prev = nullptr;
+            return item_id;
+        }
+        const size_t item_id = free_items.front();
+        free_items.pop_front();
+        auto& item = item_pool[item_id];
+        item.next = nullptr;
+        item.prev = nullptr;
+        return item_id;
+    }
+
+    void Attach(Item& item) {
+        if (!first_item) {
+            first_item = &item;
+        }
+        if (!last_item) {
+            last_item = &item;
+        } else {
+            item.prev = last_item;
+            last_item->next = &item;
+            item.next = nullptr;
+            last_item = &item;
+        }
+    }
+
+    void Detach(Item& item) {
+        if (item.prev) {
+            item.prev->next = item.next;
+        }
+        if (item.next) {
+            item.next->prev = item.prev;
+        }
+        if (&item == first_item) {
+            first_item = item.next;
+            if (first_item) {
+                first_item->prev = nullptr;
+            }
+        }
+        if (&item == last_item) {
+            last_item = item.prev;
+            if (last_item) {
+                last_item->next = nullptr;
+            }
+        }
+    }
+
+    std::deque<Item> item_pool;
+    std::deque<size_t> free_items;
+    Item* first_item{};
+    Item* last_item{};
+};
+
+} // namespace Common
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -54,12 +54,11 @@ void LogSettings() {
    log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
    log_setting("Renderer_UseAsynchronousGpuEmulation",
                values.use_asynchronous_gpu_emulation.GetValue());
-    log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
+    log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
    log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
    log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
    log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
    log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
-    log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
    log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
    log_setting("Audio_OutputEngine", values.sink_id.GetValue());
    log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
@@ -137,13 +136,12 @@ void RestoreGlobalState(bool is_powered_on) {
    values.use_disk_shader_cache.SetGlobal(true);
    values.gpu_accuracy.SetGlobal(true);
    values.use_asynchronous_gpu_emulation.SetGlobal(true);
-    values.use_nvdec_emulation.SetGlobal(true);
+    values.nvdec_emulation.SetGlobal(true);
    values.accelerate_astc.SetGlobal(true);
    values.use_vsync.SetGlobal(true);
    values.shader_backend.SetGlobal(true);
    values.use_asynchronous_shaders.SetGlobal(true);
    values.use_fast_gpu_time.SetGlobal(true);
-    values.use_caches_gc.SetGlobal(true);
    values.bg_red.SetGlobal(true);
    values.bg_green.SetGlobal(true);
    values.bg_blue.SetGlobal(true);
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <algorithm>
 #include <array>
 #include <atomic>
 #include <chrono>
@@ -47,6 +48,12 @@ enum class FullscreenMode : u32 {
    Exclusive = 1,
 };

+enum class NvdecEmulation : u32 {
+    Off = 0,
+    CPU = 1,
+    GPU = 2,
+};
+
 /** The BasicSetting class is a simple resource manager. It defines a label and default value
 * alongside the actual value of the setting for simpler and less-error prone use with frontend
 * configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -74,14 +81,14 @@ public:
     */
    explicit BasicSetting(const Type& default_val, const std::string& name)
        : default_value{default_val}, global{default_val}, label{name} {}
-    ~BasicSetting() = default;
+    virtual ~BasicSetting() = default;

    /**
     *  Returns a reference to the setting's value.
     *
     * @returns A reference to the setting
     */
-    [[nodiscard]] const Type& GetValue() const {
+    [[nodiscard]] virtual const Type& GetValue() const {
        return global;
    }

@@ -90,7 +97,7 @@ public:
     *
     * @param value The desired value
     */
-    void SetValue(const Type& value) {
+    virtual void SetValue(const Type& value) {
        Type temp{value};
        std::swap(global, temp);
    }
@@ -120,7 +127,7 @@ public:
     *
     * @returns A reference to the setting
     */
-    const Type& operator=(const Type& value) {
+    virtual const Type& operator=(const Type& value) {
        Type temp{value};
        std::swap(global, temp);
        return global;
@@ -131,7 +138,7 @@ public:
     *
     * @returns A reference to the setting
     */
-    explicit operator const Type&() const {
+    explicit virtual operator const Type&() const {
        return global;
    }

@@ -141,6 +148,51 @@ protected:
    const std::string label{};  ///< The setting's label
 };

+/**
+ * BasicRangedSetting class is intended for use with quantifiable settings that need a more
+ * restrictive range than implicitly defined by its type. Implements a minimum and maximum that is
+ * simply used to sanitize SetValue and the assignment overload.
+ */
+template <typename Type>
+class BasicRangedSetting : virtual public BasicSetting<Type> {
+public:
+    /**
+     * Sets a default value, minimum value, maximum value, and label.
+     *
+     * @param default_val Intial value of the setting, and default value of the setting
+     * @param min_val Sets the minimum allowed value of the setting
+     * @param max_val Sets the maximum allowed value of the setting
+     * @param name Label for the setting
+     */
+    explicit BasicRangedSetting(const Type& default_val, const Type& min_val, const Type& max_val,
+                                const std::string& name)
+        : BasicSetting<Type>{default_val, name}, minimum{min_val}, maximum{max_val} {}
+    virtual ~BasicRangedSetting() = default;
+
+    /**
+     * Like BasicSetting's SetValue, except value is clamped to the range of the setting.
+     *
+     * @param value The desired value
+     */
+    void SetValue(const Type& value) override {
+        this->global = std::clamp(value, minimum, maximum);
+    }
+
+    /**
+     * Like BasicSetting's assignment overload, except value is clamped to the range of the setting.
+     *
+     * @param value The desired value
+     * @returns A reference to the setting's value
+     */
+    const Type& operator=(const Type& value) override {
+        this->global = std::clamp(value, minimum, maximum);
+        return this->global;
+    }
+
+    const Type minimum; ///< Minimum allowed value of the setting
+    const Type maximum; ///< Maximum allowed value of the setting
+};
+
 /**
 * The Setting class is a slightly more complex version of the BasicSetting class. This adds a
 * custom setting to switch to when a guest application specifically requires it. The effect is that
@@ -152,7 +204,7 @@ protected:
 * Like the BasicSetting, this requires setting a default value and label to use.
 */
 template <typename Type>
-class Setting final : public BasicSetting<Type> {
+class Setting : virtual public BasicSetting<Type> {
 public:
    /**
     * Sets a default value, label, and setting value.
@@ -162,7 +214,7 @@ public:
     */
    explicit Setting(const Type& default_val, const std::string& name)
        : BasicSetting<Type>(default_val, name) {}
-    ~Setting() = default;
+    virtual ~Setting() = default;

    /**
     * Tells this setting to represent either the global or custom setting when other member
@@ -191,7 +243,13 @@ public:
     *
     * @returns The required value of the setting
     */
-    [[nodiscard]] const Type& GetValue(bool need_global = false) const {
+    [[nodiscard]] virtual const Type& GetValue() const override {
+        if (use_global) {
+            return this->global;
+        }
+        return custom;
+    }
+    [[nodiscard]] virtual const Type& GetValue(bool need_global) const {
        if (use_global || need_global) {
            return this->global;
        }
@@ -203,7 +261,7 @@ public:
     *
     * @param value The new value
     */
-    void SetValue(const Type& value) {
+    void SetValue(const Type& value) override {
        Type temp{value};
        if (use_global) {
            std::swap(this->global, temp);
@@ -219,7 +277,7 @@ public:
     *
     * @returns A reference to the current setting value
     */
-    const Type& operator=(const Type& value) {
+    const Type& operator=(const Type& value) override {
        Type temp{value};
        if (use_global) {
            std::swap(this->global, temp);
@@ -234,18 +292,87 @@ public:
     *
     * @returns A reference to the current setting value
     */
-    explicit operator const Type&() const {
+    virtual explicit operator const Type&() const override {
        if (use_global) {
            return this->global;
        }
        return custom;
    }

-private:
+protected:
    bool use_global{true}; ///< The setting's global state
    Type custom{};         ///< The custom value of the setting
 };

+/**
+ * RangedSetting is a Setting that implements a maximum and minimum value for its setting. Intended
+ * for use with quantifiable settings.
+ */
+template <typename Type>
+class RangedSetting final : public BasicRangedSetting<Type>, public Setting<Type> {
+public:
+    /**
+     * Sets a default value, minimum value, maximum value, and label.
+     *
+     * @param default_val Intial value of the setting, and default value of the setting
+     * @param min_val Sets the minimum allowed value of the setting
+     * @param max_val Sets the maximum allowed value of the setting
+     * @param name Label for the setting
+     */
+    explicit RangedSetting(const Type& default_val, const Type& min_val, const Type& max_val,
+                           const std::string& name)
+        : BasicSetting<Type>{default_val, name},
+          BasicRangedSetting<Type>{default_val, min_val, max_val, name}, Setting<Type>{default_val,
+                                                                                       name} {}
+    virtual ~RangedSetting() = default;
+
+    // The following are needed to avoid a MSVC bug
+    // (source: https://stackoverflow.com/questions/469508)
+    [[nodiscard]] const Type& GetValue() const override {
+        return Setting<Type>::GetValue();
+    }
+    [[nodiscard]] const Type& GetValue(bool need_global) const override {
+        return Setting<Type>::GetValue(need_global);
+    }
+    explicit operator const Type&() const override {
+        if (this->use_global) {
+            return this->global;
+        }
+        return this->custom;
+    }
+
+    /**
+     * Like BasicSetting's SetValue, except value is clamped to the range of the setting. Sets the
+     * appropriate value depending on the global state.
+     *
+     * @param value The desired value
+     */
+    void SetValue(const Type& value) override {
+        const Type temp = std::clamp(value, this->minimum, this->maximum);
+        if (this->use_global) {
+            this->global = temp;
+        }
+        this->custom = temp;
+    }
+
+    /**
+     * Like BasicSetting's assignment overload, except value is clamped to the range of the setting.
+     * Uses the appropriate value depending on the global state.
+     *
+     * @param value The desired value
+     * @returns A reference to the setting's value
+     */
+    const Type& operator=(const Type& value) override {
+        const Type temp = std::clamp(value, this->minimum, this->maximum);
+        if (this->use_global) {
+            this->global = temp;
+            return this->global;
+        }
+        this->custom = temp;
+        return this->custom;
+    }
+};
+
 /**
 * The InputSetting class allows for getting a reference to either the global or custom members.
 * This is required as we cannot easily modify the values of user-defined types within containers
@@ -289,13 +416,14 @@ struct Values {
    BasicSetting<std::string> sink_id{"auto", "output_engine"};
    BasicSetting<bool> audio_muted{false, "audio_muted"};
    Setting<bool> enable_audio_stretching{true, "enable_audio_stretching"};
-    Setting<u8> volume{100, "volume"};
+    RangedSetting<u8> volume{100, 0, 100, "volume"};

    // Core
    Setting<bool> use_multi_core{true, "use_multi_core"};

    // Cpu
-    Setting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, "cpu_accuracy"};
+    RangedSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
+                                            CPUAccuracy::Unsafe, "cpu_accuracy"};
    // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
    BasicSetting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
    BasicSetting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
@@ -317,7 +445,8 @@ struct Values {
    Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};

    // Renderer
-    Setting<RendererBackend> renderer_backend{RendererBackend::OpenGL, "backend"};
+    RangedSetting<RendererBackend> renderer_backend{
+        RendererBackend::OpenGL, RendererBackend::OpenGL, RendererBackend::Vulkan, "backend"};
    BasicSetting<bool> renderer_debug{false, "debug"};
    BasicSetting<bool> renderer_shader_feedback{false, "shader_feedback"};
    BasicSetting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
@@ -328,29 +457,30 @@ struct Values {
    Setting<u16> resolution_factor{1, "resolution_factor"};
    // *nix platforms may have issues with the borderless windowed fullscreen mode.
    // Default to exclusive fullscreen on these platforms for now.
-    Setting<FullscreenMode> fullscreen_mode{
+    RangedSetting<FullscreenMode> fullscreen_mode{
 #ifdef _WIN32
        FullscreenMode::Borderless,
 #else
        FullscreenMode::Exclusive,
 #endif
-        "fullscreen_mode"};
-    Setting<int> aspect_ratio{0, "aspect_ratio"};
-    Setting<int> max_anisotropy{0, "max_anisotropy"};
+        FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"};
+    RangedSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"};
+    RangedSetting<int> max_anisotropy{0, 0, 4, "max_anisotropy"};
    Setting<bool> use_speed_limit{true, "use_speed_limit"};
-    Setting<u16> speed_limit{100, "speed_limit"};
+    RangedSetting<u16> speed_limit{100, 0, 9999, "speed_limit"};
    Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
-    Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"};
+    RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
+                                            GPUAccuracy::Extreme, "gpu_accuracy"};
    Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
-    Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
+    Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
    Setting<bool> accelerate_astc{true, "accelerate_astc"};
    Setting<bool> use_vsync{true, "use_vsync"};
-    BasicSetting<u16> fps_cap{1000, "fps_cap"};
+    BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
    BasicSetting<bool> disable_fps_limit{false, "disable_fps_limit"};
-    Setting<ShaderBackend> shader_backend{ShaderBackend::GLASM, "shader_backend"};
+    RangedSetting<ShaderBackend> shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL,
+                                                ShaderBackend::SPIRV, "shader_backend"};
    Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
    Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
-    Setting<bool> use_caches_gc{false, "use_caches_gc"};

    Setting<u8> bg_red{0, "bg_red"};
    Setting<u8> bg_green{0, "bg_green"};
@@ -364,10 +494,10 @@ struct Values {
    std::chrono::seconds custom_rtc_differential;

    BasicSetting<s32> current_user{0, "current_user"};
-    Setting<s32> language_index{1, "language_index"};
-    Setting<s32> region_index{1, "region_index"};
-    Setting<s32> time_zone_index{0, "time_zone_index"};
-    Setting<s32> sound_index{1, "sound_index"};
+    RangedSetting<s32> language_index{1, 0, 17, "language_index"};
+    RangedSetting<s32> region_index{1, 0, 6, "region_index"};
+    RangedSetting<s32> time_zone_index{0, 0, 45, "time_zone_index"};
+    RangedSetting<s32> sound_index{1, 0, 2, "sound_index"};

    // Controls
    InputSetting<std::array<PlayerInput, 10>> players;
@@ -384,7 +514,7 @@ struct Values {
                                                "udp_input_servers"};

    BasicSetting<bool> mouse_panning{false, "mouse_panning"};
-    BasicSetting<u8> mouse_panning_sensitivity{10, "mouse_panning_sensitivity"};
+    BasicRangedSetting<u8> mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"};
    BasicSetting<bool> mouse_enabled{false, "mouse_enabled"};
    std::string mouse_device;
    MouseButtonsRaw mouse_buttons;
@@ -433,9 +563,10 @@ struct Values {
    BasicSetting<std::string> log_filter{"*:Info", "log_filter"};
    BasicSetting<bool> use_dev_keys{false, "use_dev_keys"};

-    // Services
+    // Network
    BasicSetting<std::string> bcat_backend{"none", "bcat_backend"};
    BasicSetting<bool> bcat_boxcat_local{false, "bcat_boxcat_local"};
+    BasicSetting<std::string> network_interface{std::string(), "network_interface"};

    // WebService
    BasicSetting<bool> enable_telemetry{true, "enable_telemetry"};
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -46,15 +46,13 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
+        ++size;

-        const size_t previous_size{size++};
-
-        // Acquire the mutex and then immediately release it as a fence.
+        // cv_mutex must be held or else there will be a missed wakeup if the other thread is in the
+        // line before cv.wait
        // TODO(bunnei): This can be replaced with C++20 waitable atomics when properly supported.
        // See discussion on https://github.com/yuzu-emu/yuzu/pull/3173 for details.
-        if (previous_size == 0) {
-            std::lock_guard lock{cv_mutex};
-        }
+        std::lock_guard lock{cv_mutex};
        cv.notify_one();
    }

--- a/src/common/uuid.h
+++ b/src/common/uuid.h
@@ -69,3 +69,14 @@ struct UUID {
 static_assert(sizeof(UUID) == 16, "UUID is an invalid size!");

 } // namespace Common
+
+namespace std {
+
+template <>
+struct hash<Common::UUID> {
+    size_t operator()(const Common::UUID& uuid) const noexcept {
+        return uuid.uuid[1] ^ uuid.uuid[0];
+    }
+};
+
+} // namespace std
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -6,7 +6,7 @@

 #include <bitset>
 #include <initializer_list>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/assert.h"

 namespace Common::X64 {
--- a/src/common/x64/xbyak_util.h
+++ b/src/common/x64/xbyak_util.h
@@ -5,7 +5,7 @@
 #pragma once

 #include <type_traits>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/x64/xbyak_abi.h"

 namespace Common::X64 {
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -452,6 +452,8 @@ add_library(core STATIC
    hle/service/nfp/nfp.h
    hle/service/nfp/nfp_user.cpp
    hle/service/nfp/nfp_user.h
+    hle/service/ngct/ngct.cpp
+    hle/service/ngct/ngct.h
    hle/service/nifm/nifm.cpp
    hle/service/nifm/nifm.h
    hle/service/nim/nim.cpp
@@ -636,6 +638,8 @@ add_library(core STATIC
    memory.h
    network/network.cpp
    network/network.h
+    network/network_interface.cpp
+    network/network_interface.h
    network/sockets.h
    perf_stats.cpp
    perf_stats.h
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -4,6 +4,7 @@

 #include <array>
 #include <atomic>
+#include <exception>
 #include <memory>
 #include <utility>

@@ -84,8 +85,6 @@ FileSys::StorageId GetStorageIdForFrontendSlot(

 } // Anonymous namespace

-/*static*/ System System::s_instance;
-
 FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
                                         const std::string& path) {
    // To account for split 00+01+etc files.
@@ -425,6 +424,20 @@ struct System::Impl {
 System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;

+System& System::GetInstance() {
+    if (!s_instance) {
+        throw std::runtime_error("Using System instance before its initialization");
+    }
+    return *s_instance;
+}
+
+void System::InitializeGlobalInstance() {
+    if (s_instance) {
+        throw std::runtime_error("Reinitializing Global System instance.");
+    }
+    s_instance = std::unique_ptr<System>(new System);
+}
+
 CpuManager& System::GetCpuManager() {
    return impl->cpu_manager;
 }
@@ -494,6 +507,12 @@ const ARM_Interface& System::CurrentArmInterface() const {
    return impl->kernel.CurrentPhysicalCore().ArmInterface();
 }

+std::size_t System::CurrentCoreIndex() const {
+    std::size_t core = impl->kernel.GetCurrentHostThreadID();
+    ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+    return core;
+}
+
 Kernel::PhysicalCore& System::CurrentPhysicalCore() {
    return impl->kernel.CurrentPhysicalCore();
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -120,9 +120,9 @@ public:
     * Gets the instance of the System singleton class.
     * @returns Reference to the instance of the System singleton class.
     */
-    [[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance() {
-        return s_instance;
-    }
+    [[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance();
+
+    static void InitializeGlobalInstance();

    /// Enumeration representing the return values of the System Initialize and Load process.
    enum class ResultStatus : u32 {
@@ -205,6 +205,9 @@ public:
    /// Gets an ARM interface to the CPU core that is currently running
    [[nodiscard]] const ARM_Interface& CurrentArmInterface() const;

+    /// Gets the index of the currently running CPU core
+    [[nodiscard]] std::size_t CurrentCoreIndex() const;
+
    /// Gets the physical core for the CPU core that is currently running
    [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore();

@@ -393,7 +396,7 @@ private:
    struct Impl;
    std::unique_ptr<Impl> impl;

-    static System s_instance;
+    inline static std::unique_ptr<System> s_instance{};
 };

 } // namespace Core
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -118,18 +118,17 @@ void CpuManager::MultiCoreRunGuestLoop() {
            physical_core = &kernel.CurrentPhysicalCore();
        }
        system.ExitDynarmicProfile();
-        {
-            Kernel::KScopedDisableDispatch dd(kernel);
-            physical_core->ArmInterface().ClearExclusiveState();
-        }
+        physical_core->ArmInterface().ClearExclusiveState();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
    }
 }

 void CpuManager::MultiCoreRunIdleThread() {
    auto& kernel = system.Kernel();
    while (true) {
-        Kernel::KScopedDisableDispatch dd(kernel);
-        kernel.CurrentPhysicalCore().Idle();
+        auto& physical_core = kernel.CurrentPhysicalCore();
+        physical_core.Idle();
+        kernel.CurrentScheduler()->RescheduleCurrentCore();
    }
 }

@@ -137,12 +136,12 @@ void CpuManager::MultiCoreRunSuspendThread() {
    auto& kernel = system.Kernel();
    kernel.CurrentScheduler()->OnThreadStart();
    while (true) {
-        auto core = kernel.CurrentPhysicalCoreIndex();
+        auto core = kernel.GetCurrentHostThreadID();
        auto& scheduler = *kernel.CurrentScheduler();
        Kernel::KThread* current_thread = scheduler.GetCurrentThread();
        Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context);
        ASSERT(scheduler.ContextSwitchPending());
-        ASSERT(core == kernel.CurrentPhysicalCoreIndex());
+        ASSERT(core == kernel.GetCurrentHostThreadID());
        scheduler.RescheduleCurrentCore();
    }
 }
@@ -348,11 +347,15 @@ void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
            sc_sync_first_use = false;
        }

-        // Emulation was stopped
-        if (stop_token.stop_requested()) {
+        // Abort if emulation was killed before the session really starts
+        if (!system.IsPoweredOn()) {
            return;
        }

+        if (stop_token.stop_requested()) {
+            break;
+        }
+
        auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
        data.is_running = true;
        Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -28,7 +28,7 @@ bool ReadFromUser(Core::System& system, s32* out, VAddr address) {

 bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) {
    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();

    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
    // TODO(bunnei): We should call CanAccessAtomic(..) here.
@@ -58,7 +58,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu

 bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) {
    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();

    // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.
    // TODO(bunnei): We should call CanAccessAtomic(..) here.
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -170,10 +170,6 @@ public:
        }
    }

-    const std::string& GetName() const {
-        return name;
-    }
-
 private:
    void RegisterWithKernel();
    void UnregisterWithKernel();
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -35,7 +35,7 @@ bool WriteToUser(Core::System& system, VAddr address, const u32* p) {
 bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero,
                      u32 new_orr_mask) {
    auto& monitor = system.Monitor();
-    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex();
+    const auto current_core = system.CurrentCoreIndex();

    // Load the value from the address.
    const auto expected = monitor.ExclusiveRead32(current_core, address);
--- a/src/core/hle/kernel/k_handle_table.cpp
+++ b/src/core/hle/kernel/k_handle_table.cpp
@@ -13,7 +13,6 @@ ResultCode KHandleTable::Finalize() {
    // Get the table and clear our record of it.
    u16 saved_table_size = 0;
    {
-        KScopedDisableDispatch dd(kernel);
        KScopedSpinLock lk(m_lock);

        std::swap(m_table_size, saved_table_size);
@@ -44,7 +43,6 @@ bool KHandleTable::Remove(Handle handle) {
    // Find the object and free the entry.
    KAutoObject* obj = nullptr;
    {
-        KScopedDisableDispatch dd(kernel);
        KScopedSpinLock lk(m_lock);

        if (this->IsValidHandle(handle)) {
@@ -63,7 +61,6 @@ bool KHandleTable::Remove(Handle handle) {
 }

 ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) {
-    KScopedDisableDispatch dd(kernel);
    KScopedSpinLock lk(m_lock);

    // Never exceed our capacity.
@@ -86,7 +83,6 @@ ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) {
 }

 ResultCode KHandleTable::Reserve(Handle* out_handle) {
-    KScopedDisableDispatch dd(kernel);
    KScopedSpinLock lk(m_lock);

    // Never exceed our capacity.
@@ -97,7 +93,6 @@ ResultCode KHandleTable::Reserve(Handle* out_handle) {
 }

 void KHandleTable::Unreserve(Handle handle) {
-    KScopedDisableDispatch dd(kernel);
    KScopedSpinLock lk(m_lock);

    // Unpack the handle.
@@ -116,7 +111,6 @@ void KHandleTable::Unreserve(Handle handle) {
 }

 void KHandleTable::Register(Handle handle, KAutoObject* obj, u16 type) {
-    KScopedDisableDispatch dd(kernel);
    KScopedSpinLock lk(m_lock);

    // Unpack the handle.
--- a/src/core/hle/kernel/k_handle_table.h
+++ b/src/core/hle/kernel/k_handle_table.h
@@ -69,7 +69,6 @@ public:
    template <typename T = KAutoObject>
    KScopedAutoObject<T> GetObjectWithoutPseudoHandle(Handle handle) const {
        // Lock and look up in table.
-        KScopedDisableDispatch dd(kernel);
        KScopedSpinLock lk(m_lock);

        if constexpr (std::is_same_v<T, KAutoObject>) {
@@ -124,7 +123,6 @@ public:
        size_t num_opened;
        {
            // Lock the table.
-            KScopedDisableDispatch dd(kernel);
            KScopedSpinLock lk(m_lock);
            for (num_opened = 0; num_opened < num_handles; num_opened++) {
                // Get the current handle.
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -59,7 +59,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
    thread->GetContext64().cpu_registers[0] = 0;
    thread->GetContext32().cpu_registers[1] = thread_handle;
    thread->GetContext64().cpu_registers[1] = thread_handle;
-    thread->DisableDispatch();

    auto& kernel = system.Kernel();
    // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -376,18 +376,20 @@ void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) {
 }

 void KScheduler::DisableScheduling(KernelCore& kernel) {
-    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 0);
-    GetCurrentThreadPointer(kernel)->DisableDispatch();
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0);
+        scheduler->GetCurrentThread()->DisableDispatch();
+    }
 }

 void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling) {
-    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 1);
-
-    if (GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() > 1) {
-        GetCurrentThreadPointer(kernel)->EnableDispatch();
-    } else {
-        RescheduleCores(kernel, cores_needing_scheduling);
+    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) {
+        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1);
+        if (scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1) {
+            scheduler->GetCurrentThread()->EnableDispatch();
+        }
    }
+    RescheduleCores(kernel, cores_needing_scheduling);
 }

 u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) {
@@ -615,17 +617,13 @@ KScheduler::KScheduler(Core::System& system_, s32 core_id_) : system{system_}, c
    state.highest_priority_thread = nullptr;
 }

-void KScheduler::Finalize() {
+KScheduler::~KScheduler() {
    if (idle_thread) {
        idle_thread->Close();
        idle_thread = nullptr;
    }
 }

-KScheduler::~KScheduler() {
-    ASSERT(!idle_thread);
-}
-
 KThread* KScheduler::GetCurrentThread() const {
    if (auto result = current_thread.load(); result) {
        return result;
@@ -644,12 +642,10 @@ void KScheduler::RescheduleCurrentCore() {
    if (phys_core.IsInterrupted()) {
        phys_core.ClearInterrupt();
    }
-
    guard.Lock();
    if (state.needs_scheduling.load()) {
        Schedule();
    } else {
-        GetCurrentThread()->EnableDispatch();
        guard.Unlock();
    }
 }
@@ -659,33 +655,26 @@ void KScheduler::OnThreadStart() {
 }

 void KScheduler::Unload(KThread* thread) {
-    ASSERT(thread);
-
    LOG_TRACE(Kernel, "core {}, unload thread {}", core_id, thread ? thread->GetName() : "nullptr");

-    if (thread->IsCallingSvc()) {
-        thread->ClearIsCallingSvc();
+    if (thread) {
+        if (thread->IsCallingSvc()) {
+            thread->ClearIsCallingSvc();
+        }
+        if (!thread->IsTerminationRequested()) {
+            prev_thread = thread;
+
+            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
+            cpu_core.SaveContext(thread->GetContext32());
+            cpu_core.SaveContext(thread->GetContext64());
+            // Save the TPIDR_EL0 system register in case it was modified.
+            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+            cpu_core.ClearExclusiveState();
+        } else {
+            prev_thread = nullptr;
+        }
+        thread->context_guard.Unlock();
    }
-
-    auto& physical_core = system.Kernel().PhysicalCore(core_id);
-    if (!physical_core.IsInitialized()) {
-        return;
-    }
-
-    Core::ARM_Interface& cpu_core = physical_core.ArmInterface();
-    cpu_core.SaveContext(thread->GetContext32());
-    cpu_core.SaveContext(thread->GetContext64());
-    // Save the TPIDR_EL0 system register in case it was modified.
-    thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
-    cpu_core.ClearExclusiveState();
-
-    if (!thread->IsTerminationRequested() && thread->GetActiveCore() == core_id) {
-        prev_thread = thread;
-    } else {
-        prev_thread = nullptr;
-    }
-
-    thread->context_guard.Unlock();
 }

 void KScheduler::Reload(KThread* thread) {
@@ -694,6 +683,11 @@ void KScheduler::Reload(KThread* thread) {
    if (thread) {
        ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable.");

+        auto* const thread_owner_process = thread->GetOwnerProcess();
+        if (thread_owner_process != nullptr) {
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+        }
+
        Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
        cpu_core.LoadContext(thread->GetContext32());
        cpu_core.LoadContext(thread->GetContext64());
@@ -711,7 +705,7 @@ void KScheduler::SwitchContextStep2() {
 }

 void KScheduler::ScheduleImpl() {
-    KThread* previous_thread = GetCurrentThread();
+    KThread* previous_thread = current_thread.load();
    KThread* next_thread = state.highest_priority_thread;

    state.needs_scheduling = false;
@@ -723,15 +717,10 @@ void KScheduler::ScheduleImpl() {

    // If we're not actually switching thread, there's nothing to do.
    if (next_thread == current_thread.load()) {
-        previous_thread->EnableDispatch();
        guard.Unlock();
        return;
    }

-    if (next_thread->GetCurrentCore() != core_id) {
-        next_thread->SetCurrentCore(core_id);
-    }
-
    current_thread.store(next_thread);

    KProcess* const previous_process = system.Kernel().CurrentProcess();
@@ -742,7 +731,11 @@ void KScheduler::ScheduleImpl() {
    Unload(previous_thread);

    std::shared_ptr<Common::Fiber>* old_context;
-    old_context = &previous_thread->GetHostContext();
+    if (previous_thread != nullptr) {
+        old_context = &previous_thread->GetHostContext();
+    } else {
+        old_context = &idle_thread->GetHostContext();
+    }
    guard.Unlock();

    Common::Fiber::YieldTo(*old_context, *switch_fiber);
--- a/src/core/hle/kernel/k_scheduler.h
+++ b/src/core/hle/kernel/k_scheduler.h
@@ -33,8 +33,6 @@ public:
    explicit KScheduler(Core::System& system_, s32 core_id_);
    ~KScheduler();

-    void Finalize();
-
    /// Reschedules to the next available thread (call after current thread is suspended)
    void RescheduleCurrentCore();

--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -14,7 +14,6 @@
 #include "common/fiber.h"
 #include "common/logging/log.h"
 #include "common/scope_exit.h"
-#include "common/settings.h"
 #include "common/thread_queue_list.h"
 #include "core/core.h"
 #include "core/cpu_manager.h"
@@ -189,7 +188,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
    // Setup the stack parameters.
    StackParameters& sp = GetStackParameters();
    sp.cur_thread = this;
-    sp.disable_count = 0;
+    sp.disable_count = 1;
    SetInExceptionHandler();

    // Set thread ID.
@@ -216,10 +215,9 @@ ResultCode KThread::InitializeThread(KThread* thread, KThreadFunction func, uint
    // Initialize the thread.
    R_TRY(thread->Initialize(func, arg, user_stack_top, prio, core, owner, type));

-    // Initialize emulation parameters.
+    // Initialize host context.
    thread->host_context =
        std::make_shared<Common::Fiber>(std::move(init_func), init_func_parameter);
-    thread->is_single_core = !Settings::values.use_multi_core.GetValue();

    return ResultSuccess;
 }
@@ -972,9 +970,6 @@ ResultCode KThread::Run() {

        // Set our state and finish.
        SetState(ThreadState::Runnable);
-
-        DisableDispatch();
-
        return ResultSuccess;
    }
 }
@@ -1059,16 +1054,4 @@ s32 GetCurrentCoreId(KernelCore& kernel) {
    return GetCurrentThread(kernel).GetCurrentCore();
 }

-KScopedDisableDispatch::~KScopedDisableDispatch() {
-    if (GetCurrentThread(kernel).GetDisableDispatchCount() <= 1) {
-        auto scheduler = kernel.CurrentScheduler();
-
-        if (scheduler) {
-            scheduler->RescheduleCurrentCore();
-        }
-    } else {
-        GetCurrentThread(kernel).EnableDispatch();
-    }
-}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -450,39 +450,16 @@ public:
        sleeping_queue = q;
    }

-    [[nodiscard]] bool IsKernelThread() const {
-        return GetActiveCore() == 3;
-    }
-
-    [[nodiscard]] bool IsDispatchTrackingDisabled() const {
-        return is_single_core || IsKernelThread();
-    }
-
    [[nodiscard]] s32 GetDisableDispatchCount() const {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return 1;
-        }
-
        return this->GetStackParameters().disable_count;
    }

    void DisableDispatch() {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return;
-        }
-
        ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() >= 0);
        this->GetStackParameters().disable_count++;
    }

    void EnableDispatch() {
-        if (IsDispatchTrackingDisabled()) {
-            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch.
-            return;
-        }
-
        ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() > 0);
        this->GetStackParameters().disable_count--;
    }
@@ -731,7 +708,6 @@ private:

    // For emulation
    std::shared_ptr<Common::Fiber> host_context{};
-    bool is_single_core{};

    // For debugging
    std::vector<KSynchronizationObject*> wait_objects_for_debugging;
@@ -776,16 +752,4 @@ public:
    }
 };

-class KScopedDisableDispatch {
-public:
-    [[nodiscard]] explicit KScopedDisableDispatch(KernelCore& kernel_) : kernel{kernel_} {
-        GetCurrentThread(kernel).DisableDispatch();
-    }
-
-    ~KScopedDisableDispatch();
-
-private:
-    KernelCore& kernel;
-};
-
 } // namespace Kernel
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -85,9 +85,8 @@ struct KernelCore::Impl {
    }

    void InitializeCores() {
-        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            cores[core_id].Initialize(current_process->Is64BitProcess());
-            system.Memory().SetCurrentPageTable(*current_process, core_id);
+        for (auto& core : cores) {
+            core.Initialize(current_process->Is64BitProcess());
        }
    }

@@ -132,6 +131,15 @@ struct KernelCore::Impl {
        next_user_process_id = KProcess::ProcessIDMin;
        next_thread_id = 1;

+        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
+            if (suspend_threads[core_id]) {
+                suspend_threads[core_id]->Close();
+                suspend_threads[core_id] = nullptr;
+            }
+
+            schedulers[core_id].reset();
+        }
+
        cores.clear();

        global_handle_table->Finalize();
@@ -159,16 +167,6 @@ struct KernelCore::Impl {
        CleanupObject(time_shared_mem);
        CleanupObject(system_resource_limit);

-        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
-            if (suspend_threads[core_id]) {
-                suspend_threads[core_id]->Close();
-                suspend_threads[core_id] = nullptr;
-            }
-
-            schedulers[core_id]->Finalize();
-            schedulers[core_id].reset();
-        }
-
        // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
        next_host_thread_id = Core::Hardware::NUM_CPU_CORES;

@@ -259,22 +257,33 @@ struct KernelCore::Impl {

    void MakeCurrentProcess(KProcess* process) {
        current_process = process;
-    }
+        if (process == nullptr) {
+            return;
+        }

-    /// Creates a new host thread ID, should only be called by GetHostThreadId
-    u32 AllocateHostThreadId(std::optional<std::size_t> core_id) {
-        if (core_id) {
-            // The first for slots are reserved for CPU core threads
-            ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES);
-            return static_cast<u32>(*core_id);
-        } else {
-            return next_host_thread_id++;
+        const u32 core_id = GetCurrentHostThreadID();
+        if (core_id < Core::Hardware::NUM_CPU_CORES) {
+            system.Memory().SetCurrentPageTable(*process, core_id);
        }
    }

+    static inline thread_local u32 host_thread_id = UINT32_MAX;
+
    /// Gets the host thread ID for the caller, allocating a new one if this is the first time
-    u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) {
-        const thread_local auto host_thread_id{AllocateHostThreadId(core_id)};
+    u32 GetHostThreadId(std::size_t core_id) {
+        if (host_thread_id == UINT32_MAX) {
+            // The first four slots are reserved for CPU core threads
+            ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+            host_thread_id = static_cast<u32>(core_id);
+        }
+        return host_thread_id;
+    }
+
+    /// Gets the host thread ID for the caller, allocating a new one if this is the first time
+    u32 GetHostThreadId() {
+        if (host_thread_id == UINT32_MAX) {
+            host_thread_id = next_host_thread_id++;
+        }
        return host_thread_id;
    }

@@ -818,20 +827,16 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
    return impl->cores[id];
 }

-size_t KernelCore::CurrentPhysicalCoreIndex() const {
-    const u32 core_id = impl->GetCurrentHostThreadID();
-    if (core_id >= Core::Hardware::NUM_CPU_CORES) {
-        return Core::Hardware::NUM_CPU_CORES - 1;
-    }
-    return core_id;
-}
-
 Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() {
-    return impl->cores[CurrentPhysicalCoreIndex()];
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
 }

 const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
-    return impl->cores[CurrentPhysicalCoreIndex()];
+    u32 core_id = impl->GetCurrentHostThreadID();
+    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+    return impl->cores[core_id];
 }

 Kernel::KScheduler* KernelCore::CurrentScheduler() {
@@ -1024,9 +1029,6 @@ void KernelCore::Suspend(bool in_suspention) {
            impl->suspend_threads[core_id]->SetState(state);
            impl->suspend_threads[core_id]->SetWaitReasonForDebugging(
                ThreadWaitReasonForDebugging::Suspended);
-            if (!should_suspend) {
-                impl->suspend_threads[core_id]->DisableDispatch();
-            }
        }
    }
 }
@@ -1041,11 +1043,13 @@ void KernelCore::ExceptionalExit() {
 }

 void KernelCore::EnterSVCProfile() {
-    impl->svc_ticks[CurrentPhysicalCoreIndex()] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
+    std::size_t core = impl->GetCurrentHostThreadID();
+    impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
 }

 void KernelCore::ExitSVCProfile() {
-    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[CurrentPhysicalCoreIndex()]);
+    std::size_t core = impl->GetCurrentHostThreadID();
+    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
 }

 std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -146,9 +146,6 @@ public:
    /// Gets the an instance of the respective physical CPU core.
    const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;

-    /// Gets the current physical core index for the running host thread.
-    std::size_t CurrentPhysicalCoreIndex() const;
-
    /// Gets the sole instance of the Scheduler at the current running core.
    Kernel::KScheduler* CurrentScheduler();

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -877,7 +877,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, Handle
            const u64 thread_ticks = current_thread->GetCpuTime();

            out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
-        } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) {
+        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
            out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
        }

@@ -1078,8 +1078,8 @@ static ResultCode GetThreadContext(Core::System& system, VAddr out_context, Hand
            for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) {
                if (thread.GetPointerUnsafe() == kernel.Scheduler(i).GetCurrentThread()) {
                    current = true;
+                    break;
                }
-                break;
            }

            // If the thread is current, retry until it isn't.
--- a/src/core/hle/service/am/applets/applet_error.cpp
+++ b/src/core/hle/service/am/applets/applet_error.cpp
@@ -16,6 +16,30 @@

 namespace Service::AM::Applets {

+struct ErrorCode {
+    u32 error_category{};
+    u32 error_number{};
+
+    static constexpr ErrorCode FromU64(u64 error_code) {
+        return {
+            .error_category{static_cast<u32>(error_code >> 32)},
+            .error_number{static_cast<u32>(error_code & 0xFFFFFFFF)},
+        };
+    }
+
+    static constexpr ErrorCode FromResultCode(ResultCode result) {
+        return {
+            .error_category{2000 + static_cast<u32>(result.module.Value())},
+            .error_number{result.description.Value()},
+        };
+    }
+
+    constexpr ResultCode ToResultCode() const {
+        return ResultCode{static_cast<ErrorModule>(error_category - 2000), error_number};
+    }
+};
+static_assert(sizeof(ErrorCode) == 0x8, "ErrorCode has incorrect size.");
+
 #pragma pack(push, 4)
 struct ShowError {
    u8 mode;
@@ -76,12 +100,7 @@ void CopyArgumentData(const std::vector<u8>& data, T& variable) {
 }

 ResultCode Decode64BitError(u64 error) {
-    const auto description = (error >> 32) & 0x1FFF;
-    auto module = error & 0x3FF;
-    if (module >= 2000)
-        module -= 2000;
-    module &= 0x1FF;
-    return {static_cast<ErrorModule>(module), static_cast<u32>(description)};
+    return ErrorCode::FromU64(error).ToResultCode();
 }

 } // Anonymous namespace
--- a/src/core/hle/service/ngct/ngct.cpp
+++ b/src/core/hle/service/ngct/ngct.cpp
@@ -0,0 +1,46 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included
+
+#include "common/string_util.h"
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/ngct/ngct.h"
+#include "core/hle/service/service.h"
+
+namespace Service::NGCT {
+
+class IService final : public ServiceFramework<IService> {
+public:
+    explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, nullptr, "Match"},
+            {1, &IService::Filter, "Filter"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void Filter(Kernel::HLERequestContext& ctx) {
+        const auto buffer = ctx.ReadBuffer();
+        const auto text = Common::StringFromFixedZeroTerminatedBuffer(
+            reinterpret_cast<const char*>(buffer.data()), buffer.size());
+
+        LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text);
+
+        // Return the same string since we don't censor anything
+        ctx.WriteBuffer(buffer);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultSuccess);
+    }
+};
+
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
+    std::make_shared<IService>(system)->InstallAsService(system.ServiceManager());
+}
+
+} // namespace Service::NGCT
--- a/src/core/hle/service/ngct/ngct.h
+++ b/src/core/hle/service/ngct/ngct.h
@@ -0,0 +1,20 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included
+
+#pragma once
+
+namespace Core {
+class System;
+}
+
+namespace Service::SM {
+class ServiceManager;
+}
+
+namespace Service::NGCT {
+
+/// Registers all NGCT services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
+
+} // namespace Service::NGCT
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -11,6 +11,7 @@
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
 #include "core/network/network.h"
+#include "core/network/network_interface.h"

 namespace Service::NIFM {

@@ -179,10 +180,10 @@ private:
        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(ResultSuccess);

-        if (Settings::values.bcat_backend.GetValue() == "none") {
-            rb.PushEnum(RequestState::NotSubmitted);
-        } else {
+        if (Network::GetHostIPv4Address().has_value()) {
            rb.PushEnum(RequestState::Connected);
+        } else {
+            rb.PushEnum(RequestState::NotSubmitted);
        }
    }

@@ -276,37 +277,45 @@ private:
    void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

-        const SfNetworkProfileData network_profile_data{
-            .ip_setting_data{
-                .ip_address_setting{
-                    .is_automatic{true},
-                    .current_address{192, 168, 1, 100},
-                    .subnet_mask{255, 255, 255, 0},
-                    .gateway{192, 168, 1, 1},
+        const auto net_iface = Network::GetSelectedNetworkInterface();
+
+        const SfNetworkProfileData network_profile_data = [&net_iface] {
+            if (!net_iface) {
+                return SfNetworkProfileData{};
+            }
+
+            return SfNetworkProfileData{
+                .ip_setting_data{
+                    .ip_address_setting{
+                        .is_automatic{true},
+                        .current_address{Network::TranslateIPv4(net_iface->ip_address)},
+                        .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
+                        .gateway{Network::TranslateIPv4(net_iface->gateway)},
+                    },
+                    .dns_setting{
+                        .is_automatic{true},
+                        .primary_dns{1, 1, 1, 1},
+                        .secondary_dns{1, 0, 0, 1},
+                    },
+                    .proxy_setting{
+                        .enabled{false},
+                        .port{},
+                        .proxy_server{},
+                        .automatic_auth_enabled{},
+                        .user{},
+                        .password{},
+                    },
+                    .mtu{1500},
                },
-                .dns_setting{
-                    .is_automatic{true},
-                    .primary_dns{1, 1, 1, 1},
-                    .secondary_dns{1, 0, 0, 1},
+                .uuid{0xdeadbeef, 0xdeadbeef},
+                .network_name{"yuzu Network"},
+                .wireless_setting_data{
+                    .ssid_length{12},
+                    .ssid{"yuzu Network"},
+                    .passphrase{"yuzupassword"},
                },
-                .proxy_setting{
-                    .enabled{false},
-                    .port{},
-                    .proxy_server{},
-                    .automatic_auth_enabled{},
-                    .user{},
-                    .password{},
-                },
-                .mtu{1500},
-            },
-            .uuid{0xdeadbeef, 0xdeadbeef},
-            .network_name{"yuzu Network"},
-            .wireless_setting_data{
-                .ssid_length{12},
-                .ssid{"yuzu Network"},
-                .passphrase{"yuzupassword"},
-            },
-        };
+            };
+        }();

        ctx.WriteBuffer(network_profile_data);

@@ -322,12 +331,15 @@ private:
    void GetCurrentIpAddress(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

-        const auto [ipv4, error] = Network::GetHostIPv4Address();
-        UNIMPLEMENTED_IF(error != Network::Errno::SUCCESS);
+        auto ipv4 = Network::GetHostIPv4Address();
+        if (!ipv4) {
+            LOG_ERROR(Service_NIFM, "Couldn't get host IPv4 address, defaulting to 0.0.0.0");
+            ipv4.emplace(Network::IPv4Address{0, 0, 0, 0});
+        }

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(ResultSuccess);
-        rb.PushRaw(ipv4);
+        rb.PushRaw(*ipv4);
    }
    void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_NIFM, "called");
@@ -348,25 +360,33 @@ private:
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

        struct IpConfigInfo {
-            IpAddressSetting ip_address_setting;
-            DnsSetting dns_setting;
+            IpAddressSetting ip_address_setting{};
+            DnsSetting dns_setting{};
        };
        static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting),
                      "IpConfigInfo has incorrect size.");

-        const IpConfigInfo ip_config_info{
-            .ip_address_setting{
-                .is_automatic{true},
-                .current_address{192, 168, 1, 100},
-                .subnet_mask{255, 255, 255, 0},
-                .gateway{192, 168, 1, 1},
-            },
-            .dns_setting{
-                .is_automatic{true},
-                .primary_dns{1, 1, 1, 1},
-                .secondary_dns{1, 0, 0, 1},
-            },
-        };
+        const auto net_iface = Network::GetSelectedNetworkInterface();
+
+        const IpConfigInfo ip_config_info = [&net_iface] {
+            if (!net_iface) {
+                return IpConfigInfo{};
+            }
+
+            return IpConfigInfo{
+                .ip_address_setting{
+                    .is_automatic{true},
+                    .current_address{Network::TranslateIPv4(net_iface->ip_address)},
+                    .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)},
+                    .gateway{Network::TranslateIPv4(net_iface->gateway)},
+                },
+                .dns_setting{
+                    .is_automatic{true},
+                    .primary_dns{1, 1, 1, 1},
+                    .secondary_dns{1, 0, 0, 1},
+                },
+            };
+        }();

        IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)};
        rb.Push(ResultSuccess);
@@ -384,10 +404,10 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(ResultSuccess);
-        if (Settings::values.bcat_backend.GetValue() == "none") {
-            rb.Push<u8>(0);
-        } else {
+        if (Network::GetHostIPv4Address().has_value()) {
            rb.Push<u8>(1);
+        } else {
+            rb.Push<u8>(0);
        }
    }
    void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
@@ -395,10 +415,10 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(ResultSuccess);
-        if (Settings::values.bcat_backend.GetValue() == "none") {
-            rb.Push<u8>(0);
-        } else {
+        if (Network::GetHostIPv4Address().has_value()) {
            rb.Push<u8>(1);
+        } else {
+            rb.Push<u8>(0);
        }
    }
 };
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                        const Common::Rectangle<int>& crop_rect) {
-    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
              addr, offset, width, height, stride, format);

-    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
-    const Tegra::FramebufferConfig framebuffer{
-        addr,      offset,   width, height, stride, static_cast<PixelFormat>(format),
-        transform, crop_rect};
+    const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
+    const Tegra::FramebufferConfig framebuffer{addr,   offset,       width,     height,
+                                               stride, pixel_format, transform, crop_rect};

    system.GetPerfStats().EndSystemFrame();
    system.GPU().SwapBuffers(&framebuffer);
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -42,7 +42,9 @@ struct IGBPBuffer {
    u32_le index;
    INSERT_PADDING_WORDS(3);
    u32_le gpu_buffer_id;
-    INSERT_PADDING_WORDS(17);
+    INSERT_PADDING_WORDS(6);
+    u32_le external_format;
+    INSERT_PADDING_WORDS(10);
    u32_le nvmap_handle;
    u32_le offset;
    INSERT_PADDING_WORDS(60);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -298,7 +298,7 @@ void NVFlinger::Compose() {
        auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
        ASSERT(nvdisp);

-        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -46,6 +46,7 @@
 #include "core/hle/service/ncm/ncm.h"
 #include "core/hle/service/nfc/nfc.h"
 #include "core/hle/service/nfp/nfp.h"
+#include "core/hle/service/ngct/ngct.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/nim/nim.h"
 #include "core/hle/service/npns/npns.h"
@@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system
    NCM::InstallInterfaces(*sm, system);
    NFC::InstallInterfaces(*sm, system);
    NFP::InstallInterfaces(*sm, system);
+    NGCT::InstallInterfaces(*sm, system);
    NIFM::InstallInterfaces(*sm, system);
    NIM::InstallInterfaces(*sm, system);
    NPNS::InstallInterfaces(*sm, system);
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1158,7 +1158,7 @@ private:

        const auto layer_id = nv_flinger.CreateLayer(display_id);
        if (!layer_id) {
-            LOG_ERROR(Service_VI, "Layer not found! layer_id={}", *layer_id);
+            LOG_ERROR(Service_VI, "Layer not found! display_id={}", display_id);
            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NOT_FOUND);
            return;
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,8 +4,6 @@

 #include <algorithm>
 #include <cstring>
-#include <optional>
-#include <utility>

 #include "common/assert.h"
 #include "common/atomic_ops.h"
@@ -14,12 +12,10 @@
 #include "common/page_table.h"
 #include "common/settings.h"
 #include "common/swap.h"
-#include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/device_memory.h"
 #include "core/hle/kernel/k_page_table.h"
 #include "core/hle/kernel/k_process.h"
-#include "core/hle/kernel/physical_memory.h"
 #include "core/memory.h"
 #include "video_core/gpu.h"

@@ -62,17 +58,7 @@ struct Memory::Impl {
        }
    }

-    bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
-        const auto& page_table = process.PageTable().PageTableImpl();
-        const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
-        return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
-    }
-
-    bool IsValidVirtualAddress(VAddr vaddr) const {
-        return IsValidVirtualAddress(*system.CurrentProcess(), vaddr);
-    }
-
-    u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
+    [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(VAddr vaddr) const {
        const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]};

        if (!paddr) {
@@ -82,18 +68,6 @@ struct Memory::Impl {
        return system.DeviceMemory().GetPointer(paddr) + vaddr;
    }

-    u8* GetPointer(const VAddr vaddr) const {
-        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
-        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
-            return pointer + vaddr;
-        }
-        const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer);
-        if (type == Common::PageType::RasterizerCachedMemory) {
-            return GetPointerFromRasterizerCachedMemory(vaddr);
-        }
-        return nullptr;
-    }
-
    u8 Read8(const VAddr addr) {
        return Read<u8>(addr);
    }
@@ -179,7 +153,7 @@ struct Memory::Impl {
        std::string string;
        string.reserve(max_length);
        for (std::size_t i = 0; i < max_length; ++i) {
-            const char c = Read8(vaddr);
+            const char c = Read<s8>(vaddr);
            if (c == '\0') {
                break;
            }
@@ -190,15 +164,14 @@ struct Memory::Impl {
        return string;
    }

-    void ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
-                   const std::size_t size) {
+    void WalkBlock(const Kernel::KProcess& process, const VAddr addr, const std::size_t size,
+                   auto on_unmapped, auto on_memory, auto on_rasterizer, auto increment) {
        const auto& page_table = process.PageTable().PageTableImpl();
-
        std::size_t remaining_size = size;
-        std::size_t page_index = src_addr >> PAGE_BITS;
-        std::size_t page_offset = src_addr & PAGE_MASK;
+        std::size_t page_index = addr >> PAGE_BITS;
+        std::size_t page_offset = addr & PAGE_MASK;

-        while (remaining_size > 0) {
+        while (remaining_size) {
            const std::size_t copy_amount =
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
@@ -206,22 +179,18 @@ struct Memory::Impl {
            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
            switch (type) {
            case Common::PageType::Unmapped: {
-                LOG_ERROR(HW_Memory,
-                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
-                          current_vaddr, src_addr, size);
-                std::memset(dest_buffer, 0, copy_amount);
+                on_unmapped(copy_amount, current_vaddr);
                break;
            }
            case Common::PageType::Memory: {
                DEBUG_ASSERT(pointer);
-                const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
-                std::memcpy(dest_buffer, src_ptr, copy_amount);
+                u8* mem_ptr = pointer + page_offset + (page_index << PAGE_BITS);
+                on_memory(copy_amount, mem_ptr);
                break;
            }
            case Common::PageType::RasterizerCachedMemory: {
-                const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
-                system.GPU().FlushRegion(current_vaddr, copy_amount);
-                std::memcpy(dest_buffer, host_ptr, copy_amount);
+                u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
+                on_rasterizer(current_vaddr, copy_amount, host_ptr);
                break;
            }
            default:
@@ -230,248 +199,122 @@ struct Memory::Impl {

            page_index++;
            page_offset = 0;
-            dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+            increment(copy_amount);
            remaining_size -= copy_amount;
        }
    }

-    void ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
-                         const std::size_t size) {
-        const auto& page_table = process.PageTable().PageTableImpl();
-
-        std::size_t remaining_size = size;
-        std::size_t page_index = src_addr >> PAGE_BITS;
-        std::size_t page_offset = src_addr & PAGE_MASK;
-
-        while (remaining_size > 0) {
-            const std::size_t copy_amount =
-                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
-            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
-
-            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
-            switch (type) {
-            case Common::PageType::Unmapped: {
+    template <bool UNSAFE>
+    void ReadBlockImpl(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
+                       const std::size_t size) {
+        WalkBlock(
+            process, src_addr, size,
+            [src_addr, size, &dest_buffer](const std::size_t copy_amount,
+                                           const VAddr current_vaddr) {
                LOG_ERROR(HW_Memory,
                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                          current_vaddr, src_addr, size);
                std::memset(dest_buffer, 0, copy_amount);
-                break;
-            }
-            case Common::PageType::Memory: {
-                DEBUG_ASSERT(pointer);
-                const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
+            },
+            [&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) {
                std::memcpy(dest_buffer, src_ptr, copy_amount);
-                break;
-            }
-            case Common::PageType::RasterizerCachedMemory: {
-                const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
+            },
+            [&system = system, &dest_buffer](const VAddr current_vaddr,
+                                             const std::size_t copy_amount,
+                                             const u8* const host_ptr) {
+                if constexpr (!UNSAFE) {
+                    system.GPU().FlushRegion(current_vaddr, copy_amount);
+                }
                std::memcpy(dest_buffer, host_ptr, copy_amount);
-                break;
-            }
-            default:
-                UNREACHABLE();
-            }
-
-            page_index++;
-            page_offset = 0;
-            dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
-            remaining_size -= copy_amount;
-        }
+            },
+            [&dest_buffer](const std::size_t copy_amount) {
+                dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+            });
    }

    void ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
-        ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
+        ReadBlockImpl<false>(*system.CurrentProcess(), src_addr, dest_buffer, size);
    }

    void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
-        ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
+        ReadBlockImpl<true>(*system.CurrentProcess(), src_addr, dest_buffer, size);
    }

-    void WriteBlock(const Kernel::KProcess& process, const VAddr dest_addr, const void* src_buffer,
-                    const std::size_t size) {
-        const auto& page_table = process.PageTable().PageTableImpl();
-        std::size_t remaining_size = size;
-        std::size_t page_index = dest_addr >> PAGE_BITS;
-        std::size_t page_offset = dest_addr & PAGE_MASK;
-
-        while (remaining_size > 0) {
-            const std::size_t copy_amount =
-                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
-            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
-
-            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
-            switch (type) {
-            case Common::PageType::Unmapped: {
+    template <bool UNSAFE>
+    void WriteBlockImpl(const Kernel::KProcess& process, const VAddr dest_addr,
+                        const void* src_buffer, const std::size_t size) {
+        WalkBlock(
+            process, dest_addr, size,
+            [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
                LOG_ERROR(HW_Memory,
                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                          current_vaddr, dest_addr, size);
-                break;
-            }
-            case Common::PageType::Memory: {
-                DEBUG_ASSERT(pointer);
-                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
+            },
+            [&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) {
                std::memcpy(dest_ptr, src_buffer, copy_amount);
-                break;
-            }
-            case Common::PageType::RasterizerCachedMemory: {
-                u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
-                system.GPU().InvalidateRegion(current_vaddr, copy_amount);
+            },
+            [&system = system, &src_buffer](const VAddr current_vaddr,
+                                            const std::size_t copy_amount, u8* const host_ptr) {
+                if constexpr (!UNSAFE) {
+                    system.GPU().InvalidateRegion(current_vaddr, copy_amount);
+                }
                std::memcpy(host_ptr, src_buffer, copy_amount);
-                break;
-            }
-            default:
-                UNREACHABLE();
-            }
-
-            page_index++;
-            page_offset = 0;
-            src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
-            remaining_size -= copy_amount;
-        }
-    }
-
-    void WriteBlockUnsafe(const Kernel::KProcess& process, const VAddr dest_addr,
-                          const void* src_buffer, const std::size_t size) {
-        const auto& page_table = process.PageTable().PageTableImpl();
-        std::size_t remaining_size = size;
-        std::size_t page_index = dest_addr >> PAGE_BITS;
-        std::size_t page_offset = dest_addr & PAGE_MASK;
-
-        while (remaining_size > 0) {
-            const std::size_t copy_amount =
-                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
-            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
-
-            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
-            switch (type) {
-            case Common::PageType::Unmapped: {
-                LOG_ERROR(HW_Memory,
-                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
-                          current_vaddr, dest_addr, size);
-                break;
-            }
-            case Common::PageType::Memory: {
-                DEBUG_ASSERT(pointer);
-                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
-                std::memcpy(dest_ptr, src_buffer, copy_amount);
-                break;
-            }
-            case Common::PageType::RasterizerCachedMemory: {
-                u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
-                std::memcpy(host_ptr, src_buffer, copy_amount);
-                break;
-            }
-            default:
-                UNREACHABLE();
-            }
-
-            page_index++;
-            page_offset = 0;
-            src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
-            remaining_size -= copy_amount;
-        }
+            },
+            [&src_buffer](const std::size_t copy_amount) {
+                src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+            });
    }

    void WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
-        WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
+        WriteBlockImpl<false>(*system.CurrentProcess(), dest_addr, src_buffer, size);
    }

    void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
-        WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
+        WriteBlockImpl<true>(*system.CurrentProcess(), dest_addr, src_buffer, size);
    }

    void ZeroBlock(const Kernel::KProcess& process, const VAddr dest_addr, const std::size_t size) {
-        const auto& page_table = process.PageTable().PageTableImpl();
-        std::size_t remaining_size = size;
-        std::size_t page_index = dest_addr >> PAGE_BITS;
-        std::size_t page_offset = dest_addr & PAGE_MASK;
-
-        while (remaining_size > 0) {
-            const std::size_t copy_amount =
-                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
-            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
-
-            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
-            switch (type) {
-            case Common::PageType::Unmapped: {
+        WalkBlock(
+            process, dest_addr, size,
+            [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
                LOG_ERROR(HW_Memory,
                          "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                          current_vaddr, dest_addr, size);
-                break;
-            }
-            case Common::PageType::Memory: {
-                DEBUG_ASSERT(pointer);
-                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
+            },
+            [](const std::size_t copy_amount, u8* const dest_ptr) {
                std::memset(dest_ptr, 0, copy_amount);
-                break;
-            }
-            case Common::PageType::RasterizerCachedMemory: {
-                u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
+            },
+            [&system = system](const VAddr current_vaddr, const std::size_t copy_amount,
+                               u8* const host_ptr) {
                system.GPU().InvalidateRegion(current_vaddr, copy_amount);
                std::memset(host_ptr, 0, copy_amount);
-                break;
-            }
-            default:
-                UNREACHABLE();
-            }
-
-            page_index++;
-            page_offset = 0;
-            remaining_size -= copy_amount;
-        }
-    }
-
-    void ZeroBlock(const VAddr dest_addr, const std::size_t size) {
-        ZeroBlock(*system.CurrentProcess(), dest_addr, size);
+            },
+            [](const std::size_t copy_amount) {});
    }

    void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
                   const std::size_t size) {
-        const auto& page_table = process.PageTable().PageTableImpl();
-        std::size_t remaining_size = size;
-        std::size_t page_index = src_addr >> PAGE_BITS;
-        std::size_t page_offset = src_addr & PAGE_MASK;
-
-        while (remaining_size > 0) {
-            const std::size_t copy_amount =
-                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
-            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
-
-            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
-            switch (type) {
-            case Common::PageType::Unmapped: {
+        WalkBlock(
+            process, dest_addr, size,
+            [this, &process, &dest_addr, &src_addr, size](const std::size_t copy_amount,
+                                                          const VAddr current_vaddr) {
                LOG_ERROR(HW_Memory,
                          "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                          current_vaddr, src_addr, size);
                ZeroBlock(process, dest_addr, copy_amount);
-                break;
-            }
-            case Common::PageType::Memory: {
-                DEBUG_ASSERT(pointer);
-                const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
-                WriteBlock(process, dest_addr, src_ptr, copy_amount);
-                break;
-            }
-            case Common::PageType::RasterizerCachedMemory: {
-                const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
+            },
+            [this, &process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) {
+                WriteBlockImpl<false>(process, dest_addr, src_ptr, copy_amount);
+            },
+            [this, &system = system, &process, &dest_addr](
+                const VAddr current_vaddr, const std::size_t copy_amount, u8* const host_ptr) {
                system.GPU().FlushRegion(current_vaddr, copy_amount);
-                WriteBlock(process, dest_addr, host_ptr, copy_amount);
-                break;
-            }
-            default:
-                UNREACHABLE();
-            }
-
-            page_index++;
-            page_offset = 0;
-            dest_addr += static_cast<VAddr>(copy_amount);
-            src_addr += static_cast<VAddr>(copy_amount);
-            remaining_size -= copy_amount;
-        }
-    }
-
-    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
-        return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
+                WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount);
+            },
+            [&dest_addr, &src_addr](const std::size_t copy_amount) {
+                dest_addr += static_cast<VAddr>(copy_amount);
+                src_addr += static_cast<VAddr>(copy_amount);
+            });
    }

    void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
@@ -514,7 +357,7 @@ struct Memory::Impl {
            } else {
                // Switch page type to uncached if now uncached
                switch (page_type) {
-                case Common::PageType::Unmapped:
+                case Common::PageType::Unmapped: // NOLINT(bugprone-branch-clone)
                    // It is not necessary for a process to have this region mapped into its address
                    // space, for example, a system module need not have a VRAM mapping.
                    break;
@@ -597,6 +440,44 @@ struct Memory::Impl {
        }
    }

+    [[nodiscard]] u8* GetPointerImpl(VAddr vaddr, auto on_unmapped, auto on_rasterizer) const {
+        // AARCH64 masks the upper 16 bit of all memory accesses
+        vaddr &= 0xffffffffffffLL;
+
+        if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
+            on_unmapped();
+            return nullptr;
+        }
+
+        // Avoid adding any extra logic to this fast-path block
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
+            return &pointer[vaddr];
+        }
+        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
+        case Common::PageType::Unmapped:
+            on_unmapped();
+            return nullptr;
+        case Common::PageType::Memory:
+            ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr);
+            return nullptr;
+        case Common::PageType::RasterizerCachedMemory: {
+            u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
+            on_rasterizer();
+            return host_ptr;
+        }
+        default:
+            UNREACHABLE();
+        }
+        return nullptr;
+    }
+
+    [[nodiscard]] u8* GetPointer(const VAddr vaddr) const {
+        return GetPointerImpl(
+            vaddr, [vaddr]() { LOG_ERROR(HW_Memory, "Unmapped GetPointer @ 0x{:016X}", vaddr); },
+            []() {});
+    }
+
    /**
     * Reads a particular data type out of memory at the given virtual address.
     *
@@ -610,39 +491,17 @@ struct Memory::Impl {
     */
    template <typename T>
    T Read(VAddr vaddr) {
-        // AARCH64 masks the upper 16 bit of all memory accesses
-        vaddr &= 0xffffffffffffLL;
-
-        if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
-            LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
-            return 0;
+        T result = 0;
+        const u8* const ptr = GetPointerImpl(
+            vaddr,
+            [vaddr]() {
+                LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, vaddr);
+            },
+            [&system = system, vaddr]() { system.GPU().FlushRegion(vaddr, sizeof(T)); });
+        if (ptr) {
+            std::memcpy(&result, ptr, sizeof(T));
        }
-
-        // Avoid adding any extra logic to this fast-path block
-        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
-        if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
-            T value;
-            std::memcpy(&value, &pointer[vaddr], sizeof(T));
-            return value;
-        }
-        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
-        case Common::PageType::Unmapped:
-            LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
-            return 0;
-        case Common::PageType::Memory:
-            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
-            break;
-        case Common::PageType::RasterizerCachedMemory: {
-            const u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
-            system.GPU().FlushRegion(vaddr, sizeof(T));
-            T value;
-            std::memcpy(&value, host_ptr, sizeof(T));
-            return value;
-        }
-        default:
-            UNREACHABLE();
-        }
-        return {};
+        return result;
    }

    /**
@@ -656,110 +515,46 @@ struct Memory::Impl {
     */
    template <typename T>
    void Write(VAddr vaddr, const T data) {
-        // AARCH64 masks the upper 16 bit of all memory accesses
-        vaddr &= 0xffffffffffffLL;
-
-        if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
-                      static_cast<u32>(data), vaddr);
-            return;
-        }
-
-        // Avoid adding any extra logic to this fast-path block
-        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
-        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
-            std::memcpy(&pointer[vaddr], &data, sizeof(T));
-            return;
-        }
-        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
-        case Common::PageType::Unmapped:
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
-                      static_cast<u32>(data), vaddr);
-            return;
-        case Common::PageType::Memory:
-            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
-            break;
-        case Common::PageType::RasterizerCachedMemory: {
-            u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
-            system.GPU().InvalidateRegion(vaddr, sizeof(T));
-            std::memcpy(host_ptr, &data, sizeof(T));
-            break;
-        }
-        default:
-            UNREACHABLE();
+        u8* const ptr = GetPointerImpl(
+            vaddr,
+            [vaddr, data]() {
+                LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
+                          vaddr, static_cast<u64>(data));
+            },
+            [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
+        if (ptr) {
+            std::memcpy(ptr, &data, sizeof(T));
        }
    }

    template <typename T>
    bool WriteExclusive(VAddr vaddr, const T data, const T expected) {
-        // AARCH64 masks the upper 16 bit of all memory accesses
-        vaddr &= 0xffffffffffffLL;
-
-        if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
-                      static_cast<u32>(data), vaddr);
-            return true;
-        }
-
-        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
-        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
-            // NOTE: Avoid adding any extra logic to this fast-path block
-            const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]);
+        u8* const ptr = GetPointerImpl(
+            vaddr,
+            [vaddr, data]() {
+                LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
+                          sizeof(T) * 8, vaddr, static_cast<u64>(data));
+            },
+            [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(T)); });
+        if (ptr) {
+            const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
            return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
        }
-        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
-        case Common::PageType::Unmapped:
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
-                      static_cast<u32>(data), vaddr);
-            return true;
-        case Common::PageType::Memory:
-            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
-            break;
-        case Common::PageType::RasterizerCachedMemory: {
-            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
-            system.GPU().InvalidateRegion(vaddr, sizeof(T));
-            auto* pointer = reinterpret_cast<volatile T*>(&host_ptr);
-            return Common::AtomicCompareAndSwap(pointer, data, expected);
-        }
-        default:
-            UNREACHABLE();
-        }
        return true;
    }

    bool WriteExclusive128(VAddr vaddr, const u128 data, const u128 expected) {
-        // AARCH64 masks the upper 16 bit of all memory accesses
-        vaddr &= 0xffffffffffffLL;
-
-        if (vaddr >= 1uLL << current_page_table->GetAddressSpaceBits()) {
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
-                      static_cast<u32>(data[0]), vaddr);
-            return true;
-        }
-
-        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
-        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
-            // NOTE: Avoid adding any extra logic to this fast-path block
-            const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]);
+        u8* const ptr = GetPointerImpl(
+            vaddr,
+            [vaddr, data]() {
+                LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
+                          vaddr, static_cast<u64>(data[1]), static_cast<u64>(data[0]));
+            },
+            [&system = system, vaddr]() { system.GPU().InvalidateRegion(vaddr, sizeof(u128)); });
+        if (ptr) {
+            const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
            return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
        }
-        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
-        case Common::PageType::Unmapped:
-            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
-                      static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
-            return true;
-        case Common::PageType::Memory:
-            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
-            break;
-        case Common::PageType::RasterizerCachedMemory: {
-            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
-            system.GPU().InvalidateRegion(vaddr, sizeof(u128));
-            auto* pointer = reinterpret_cast<volatile u64*>(&host_ptr);
-            return Common::AtomicCompareAndSwap(pointer, data, expected);
-        }
-        default:
-            UNREACHABLE();
-        }
        return true;
    }

@@ -789,12 +584,11 @@ void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
    impl->UnmapRegion(page_table, base, size);
 }

-bool Memory::IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
-    return impl->IsValidVirtualAddress(process, vaddr);
-}
-
 bool Memory::IsValidVirtualAddress(const VAddr vaddr) const {
-    return impl->IsValidVirtualAddress(vaddr);
+    const Kernel::KProcess& process = *system.CurrentProcess();
+    const auto& page_table = process.PageTable().PageTableImpl();
+    const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
+    return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
 }

 u8* Memory::GetPointer(VAddr vaddr) {
@@ -863,64 +657,38 @@ std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {

 void Memory::ReadBlock(const Kernel::KProcess& process, const VAddr src_addr, void* dest_buffer,
                       const std::size_t size) {
-    impl->ReadBlock(process, src_addr, dest_buffer, size);
+    impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
 }

 void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
    impl->ReadBlock(src_addr, dest_buffer, size);
 }

-void Memory::ReadBlockUnsafe(const Kernel::KProcess& process, const VAddr src_addr,
-                             void* dest_buffer, const std::size_t size) {
-    impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
-}
-
 void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
    impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
 }

 void Memory::WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
                        std::size_t size) {
-    impl->WriteBlock(process, dest_addr, src_buffer, size);
+    impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
 }

 void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
    impl->WriteBlock(dest_addr, src_buffer, size);
 }

-void Memory::WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr,
-                              const void* src_buffer, std::size_t size) {
-    impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
-}
-
 void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
                              const std::size_t size) {
    impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
 }

-void Memory::ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size) {
-    impl->ZeroBlock(process, dest_addr, size);
-}
-
-void Memory::ZeroBlock(VAddr dest_addr, std::size_t size) {
-    impl->ZeroBlock(dest_addr, size);
-}
-
 void Memory::CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
                       const std::size_t size) {
    impl->CopyBlock(process, dest_addr, src_addr, size);
 }

-void Memory::CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size) {
-    impl->CopyBlock(dest_addr, src_addr, size);
-}
-
 void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
    impl->RasterizerMarkRegionCached(vaddr, size, cached);
 }

-bool IsKernelVirtualAddress(const VAddr vaddr) {
-    return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
-}
-
 } // namespace Core::Memory
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -39,11 +39,6 @@ enum : VAddr {

    /// Application stack
    DEFAULT_STACK_SIZE = 0x100000,
-
-    /// Kernel Virtual Address Range
-    KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
-    KERNEL_REGION_SIZE = 0x7FFFE00000,
-    KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
 };

 /// Central class that handles all memory operations and state.
@@ -56,7 +51,7 @@ public:
    Memory& operator=(const Memory&) = delete;

    Memory(Memory&&) = default;
-    Memory& operator=(Memory&&) = default;
+    Memory& operator=(Memory&&) = delete;

    /**
     * Resets the state of the Memory system.
@@ -90,17 +85,6 @@ public:
     */
    void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);

-    /**
-     * Checks whether or not the supplied address is a valid virtual
-     * address for the given process.
-     *
-     * @param process The emulated process to check the address against.
-     * @param vaddr   The virtual address to check the validity of.
-     *
-     * @returns True if the given virtual address is valid, false otherwise.
-     */
-    bool IsValidVirtualAddress(const Kernel::KProcess& process, VAddr vaddr) const;
-
    /**
     * Checks whether or not the supplied address is a valid virtual
     * address for the current process.
@@ -109,7 +93,7 @@ public:
     *
     * @returns True if the given virtual address is valid, false otherwise.
     */
-    bool IsValidVirtualAddress(VAddr vaddr) const;
+    [[nodiscard]] bool IsValidVirtualAddress(VAddr vaddr) const;

    /**
     * Gets a pointer to the given address.
@@ -134,7 +118,7 @@ public:
     * @returns The pointer to the given address, if the address is valid.
     *          If the address is not valid, nullptr will be returned.
     */
-    const u8* GetPointer(VAddr vaddr) const;
+    [[nodiscard]] const u8* GetPointer(VAddr vaddr) const;

    template <typename T>
    const T* GetPointer(VAddr vaddr) const {
@@ -327,27 +311,6 @@ public:
    void ReadBlock(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
                   std::size_t size);

-    /**
-     * Reads a contiguous block of bytes from a specified process' address space.
-     * This unsafe version does not trigger GPU flushing.
-     *
-     * @param process     The process to read the data from.
-     * @param src_addr    The virtual address to begin reading from.
-     * @param dest_buffer The buffer to place the read bytes into.
-     * @param size        The amount of data to read, in bytes.
-     *
-     * @note If a size of 0 is specified, then this function reads nothing and
-     *       no attempts to access memory are made at all.
-     *
-     * @pre dest_buffer must be at least size bytes in length, otherwise a
-     *      buffer overrun will occur.
-     *
-     * @post The range [dest_buffer, size) contains the read bytes from the
-     *       process' address space.
-     */
-    void ReadBlockUnsafe(const Kernel::KProcess& process, VAddr src_addr, void* dest_buffer,
-                         std::size_t size);
-
    /**
     * Reads a contiguous block of bytes from the current process' address space.
     *
@@ -408,26 +371,6 @@ public:
    void WriteBlock(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
                    std::size_t size);

-    /**
-     * Writes a range of bytes into a given process' address space at the specified
-     * virtual address.
-     * This unsafe version does not invalidate GPU Memory.
-     *
-     * @param process    The process to write data into the address space of.
-     * @param dest_addr  The destination virtual address to begin writing the data at.
-     * @param src_buffer The data to write into the process' address space.
-     * @param size       The size of the data to write, in bytes.
-     *
-     * @post The address range [dest_addr, size) in the process' address space
-     *       contains the data that was within src_buffer.
-     *
-     * @post If an attempt is made to write into an unmapped region of memory, the writes
-     *       will be ignored and an error will be logged.
-     *
-     */
-    void WriteBlockUnsafe(const Kernel::KProcess& process, VAddr dest_addr, const void* src_buffer,
-                          std::size_t size);
-
    /**
     * Writes a range of bytes into the current process' address space at the specified
     * virtual address.
@@ -467,29 +410,6 @@ public:
     */
    void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);

-    /**
-     * Fills the specified address range within a process' address space with zeroes.
-     *
-     * @param process   The process that will have a portion of its memory zeroed out.
-     * @param dest_addr The starting virtual address of the range to zero out.
-     * @param size      The size of the address range to zero out, in bytes.
-     *
-     * @post The range [dest_addr, size) within the process' address space is
-     *       filled with zeroes.
-     */
-    void ZeroBlock(const Kernel::KProcess& process, VAddr dest_addr, std::size_t size);
-
-    /**
-     * Fills the specified address range within the current process' address space with zeroes.
-     *
-     * @param dest_addr The starting virtual address of the range to zero out.
-     * @param size      The size of the address range to zero out, in bytes.
-     *
-     * @post The range [dest_addr, size) within the current process' address space is
-     *       filled with zeroes.
-     */
-    void ZeroBlock(VAddr dest_addr, std::size_t size);
-
    /**
     * Copies data within a process' address space to another location within the
     * same address space.
@@ -505,19 +425,6 @@ public:
    void CopyBlock(const Kernel::KProcess& process, VAddr dest_addr, VAddr src_addr,
                   std::size_t size);

-    /**
-     * Copies data within the current process' address space to another location within the
-     * same address space.
-     *
-     * @param dest_addr The destination virtual address to begin copying the data into.
-     * @param src_addr  The source virtual address to begin copying the data from.
-     * @param size      The size of the data to copy, in bytes.
-     *
-     * @post The range [dest_addr, size) within the current process' address space
-     *       contains the same data within the range [src_addr, size).
-     */
-    void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
-
    /**
     * Marks each page within the specified address range as cached or uncached.
     *
@@ -535,7 +442,4 @@ private:
    std::unique_ptr<Impl> impl;
 };

-/// Determines if the given VAddr is a kernel address
-bool IsKernelVirtualAddress(VAddr vaddr);
-
 } // namespace Core::Memory
--- a/src/core/network/network.cpp
+++ b/src/core/network/network.cpp
@@ -10,9 +10,10 @@
 #include "common/common_funcs.h"

 #ifdef _WIN32
-#define _WINSOCK_DEPRECATED_NO_WARNINGS // gethostname
 #include <winsock2.h>
+#include <ws2tcpip.h>
 #elif YUZU_UNIX
+#include <arpa/inet.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <netdb.h>
@@ -27,7 +28,9 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/settings.h"
 #include "core/network/network.h"
+#include "core/network/network_interface.h"
 #include "core/network/sockets.h"

 namespace Network {
@@ -47,11 +50,6 @@ void Finalize() {
    WSACleanup();
 }

-constexpr IPv4Address TranslateIPv4(in_addr addr) {
-    auto& bytes = addr.S_un.S_un_b;
-    return IPv4Address{bytes.s_b1, bytes.s_b2, bytes.s_b3, bytes.s_b4};
-}
-
 sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
    sockaddr_in result;

@@ -138,12 +136,6 @@ void Initialize() {}

 void Finalize() {}

-constexpr IPv4Address TranslateIPv4(in_addr addr) {
-    const u32 bytes = addr.s_addr;
-    return IPv4Address{static_cast<u8>(bytes), static_cast<u8>(bytes >> 8),
-                       static_cast<u8>(bytes >> 16), static_cast<u8>(bytes >> 24)};
-}
-
 sockaddr TranslateFromSockAddrIn(SockAddrIn input) {
    sockaddr_in result;

@@ -182,7 +174,7 @@ linger MakeLinger(bool enable, u32 linger_value) {
 }

 bool EnableNonBlock(int fd, bool enable) {
-    int flags = fcntl(fd, F_GETFD);
+    int flags = fcntl(fd, F_GETFL);
    if (flags == -1) {
        return false;
    }
@@ -191,7 +183,7 @@ bool EnableNonBlock(int fd, bool enable) {
    } else {
        flags &= ~O_NONBLOCK;
    }
-    return fcntl(fd, F_SETFD, flags) == 0;
+    return fcntl(fd, F_SETFL, flags) == 0;
 }

 Errno TranslateNativeError(int e) {
@@ -227,8 +219,12 @@ Errno GetAndLogLastError() {
 #else
    int e = errno;
 #endif
+    const Errno err = TranslateNativeError(e);
+    if (err == Errno::AGAIN) {
+        return err;
+    }
    LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e));
-    return TranslateNativeError(e);
+    return err;
 }

 int TranslateDomain(Domain domain) {
@@ -353,27 +349,29 @@ NetworkInstance::~NetworkInstance() {
    Finalize();
 }

-std::pair<IPv4Address, Errno> GetHostIPv4Address() {
-    std::array<char, 256> name{};
-    if (gethostname(name.data(), static_cast<int>(name.size()) - 1) == SOCKET_ERROR) {
-        return {IPv4Address{}, GetAndLogLastError()};
+std::optional<IPv4Address> GetHostIPv4Address() {
+    const std::string& selected_network_interface = Settings::values.network_interface.GetValue();
+    const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
+    if (network_interfaces.size() == 0) {
+        LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
+        return {};
    }

-    hostent* const ent = gethostbyname(name.data());
-    if (!ent) {
-        return {IPv4Address{}, GetAndLogLastError()};
-    }
-    if (ent->h_addr_list == nullptr) {
-        UNIMPLEMENTED_MSG("No addr provided in hostent->h_addr_list");
-        return {IPv4Address{}, Errno::SUCCESS};
-    }
-    if (ent->h_length != sizeof(in_addr)) {
-        UNIMPLEMENTED_MSG("Unexpected size={} in hostent->h_length", ent->h_length);
-    }
+    const auto res =
+        std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
+            return iface.name == selected_network_interface;
+        });

-    in_addr addr;
-    std::memcpy(&addr, ent->h_addr_list[0], sizeof(addr));
-    return {TranslateIPv4(addr), Errno::SUCCESS};
+    if (res != network_interfaces.end()) {
+        char ip_addr[16] = {};
+        ASSERT(inet_ntop(AF_INET, &res->ip_address, ip_addr, sizeof(ip_addr)) != nullptr);
+        LOG_INFO(Network, "IP address: {}", ip_addr);
+
+        return TranslateIPv4(res->ip_address);
+    } else {
+        LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
+        return {};
+    }
 }

 std::pair<s32, Errno> Poll(std::vector<PollFD>& pollfds, s32 timeout) {
--- a/src/core/network/network.h
+++ b/src/core/network/network.h
@@ -5,11 +5,18 @@
 #pragma once

 #include <array>
+#include <optional>
 #include <utility>

 #include "common/common_funcs.h"
 #include "common/common_types.h"

+#ifdef _WIN32
+#include <winsock2.h>
+#elif YUZU_UNIX
+#include <netinet/in.h>
+#endif
+
 namespace Network {

 class Socket;
@@ -92,8 +99,21 @@ public:
    ~NetworkInstance();
 };

+#ifdef _WIN32
+constexpr IPv4Address TranslateIPv4(in_addr addr) {
+    auto& bytes = addr.S_un.S_un_b;
+    return IPv4Address{bytes.s_b1, bytes.s_b2, bytes.s_b3, bytes.s_b4};
+}
+#elif YUZU_UNIX
+constexpr IPv4Address TranslateIPv4(in_addr addr) {
+    const u32 bytes = addr.s_addr;
+    return IPv4Address{static_cast<u8>(bytes), static_cast<u8>(bytes >> 8),
+                       static_cast<u8>(bytes >> 16), static_cast<u8>(bytes >> 24)};
+}
+#endif
+
 /// @brief Returns host's IPv4 address
-/// @return Pair of an array of human ordered IPv4 address (e.g. 192.168.0.1) and an error code
-std::pair<IPv4Address, Errno> GetHostIPv4Address();
+/// @return human ordered IPv4 address (e.g. 192.168.0.1) as an array
+std::optional<IPv4Address> GetHostIPv4Address();

 } // namespace Network
--- a/src/core/network/network_interface.cpp
+++ b/src/core/network/network_interface.cpp
@@ -0,0 +1,210 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "common/bit_cast.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "common/string_util.h"
+#include "core/network/network_interface.h"
+
+#ifdef _WIN32
+#include <iphlpapi.h>
+#else
+#include <cerrno>
+#include <ifaddrs.h>
+#include <net/if.h>
+#endif
+
+namespace Network {
+
+#ifdef _WIN32
+
+std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
+    std::vector<IP_ADAPTER_ADDRESSES> adapter_addresses;
+    DWORD ret = ERROR_BUFFER_OVERFLOW;
+    DWORD buf_size = 0;
+
+    // retry up to 5 times
+    for (int i = 0; i < 5 && ret == ERROR_BUFFER_OVERFLOW; i++) {
+        ret = GetAdaptersAddresses(
+            AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS,
+            nullptr, adapter_addresses.data(), &buf_size);
+
+        if (ret != ERROR_BUFFER_OVERFLOW) {
+            break;
+        }
+
+        adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);
+    }
+
+    if (ret != NO_ERROR) {
+        LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses");
+        return {};
+    }
+
+    std::vector<NetworkInterface> result;
+
+    for (auto current_address = adapter_addresses.data(); current_address != nullptr;
+         current_address = current_address->Next) {
+        if (current_address->FirstUnicastAddress == nullptr ||
+            current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) {
+            continue;
+        }
+
+        if (current_address->OperStatus != IfOperStatusUp) {
+            continue;
+        }
+
+        const auto ip_addr = Common::BitCast<struct sockaddr_in>(
+                                 *current_address->FirstUnicastAddress->Address.lpSockaddr)
+                                 .sin_addr;
+
+        ULONG mask = 0;
+        if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength,
+                                    &mask) != NO_ERROR) {
+            LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask");
+            continue;
+        }
+
+        struct in_addr gateway = {.S_un{.S_addr{0}}};
+        if (current_address->FirstGatewayAddress != nullptr &&
+            current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) {
+            gateway = Common::BitCast<struct sockaddr_in>(
+                          *current_address->FirstGatewayAddress->Address.lpSockaddr)
+                          .sin_addr;
+        }
+
+        result.emplace_back(NetworkInterface{
+            .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})},
+            .ip_address{ip_addr},
+            .subnet_mask = in_addr{.S_un{.S_addr{mask}}},
+            .gateway = gateway});
+    }
+
+    return result;
+}
+
+#else
+
+std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {
+    struct ifaddrs* ifaddr = nullptr;
+
+    if (getifaddrs(&ifaddr) != 0) {
+        LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",
+                  std::strerror(errno));
+        return {};
+    }
+
+    std::vector<NetworkInterface> result;
+
+    for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {
+        if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {
+            continue;
+        }
+
+        if (ifa->ifa_addr->sa_family != AF_INET) {
+            continue;
+        }
+
+        if ((ifa->ifa_flags & IFF_UP) == 0 || (ifa->ifa_flags & IFF_LOOPBACK) != 0) {
+            continue;
+        }
+
+        u32 gateway{};
+
+        std::ifstream file{"/proc/net/route"};
+        if (!file.is_open()) {
+            LOG_ERROR(Network, "Failed to open \"/proc/net/route\"");
+
+            result.emplace_back(NetworkInterface{
+                .name{ifa->ifa_name},
+                .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
+                .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
+                .gateway{in_addr{.s_addr = gateway}}});
+            continue;
+        }
+
+        // ignore header
+        file.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+
+        bool gateway_found = false;
+
+        for (std::string line; std::getline(file, line);) {
+            std::istringstream iss{line};
+
+            std::string iface_name;
+            iss >> iface_name;
+            if (iface_name != ifa->ifa_name) {
+                continue;
+            }
+
+            iss >> std::hex;
+
+            u32 dest{};
+            iss >> dest;
+            if (dest != 0) {
+                // not the default route
+                continue;
+            }
+
+            iss >> gateway;
+
+            u16 flags{};
+            iss >> flags;
+
+            // flag RTF_GATEWAY (defined in <linux/route.h>)
+            if ((flags & 0x2) == 0) {
+                continue;
+            }
+
+            gateway_found = true;
+            break;
+        }
+
+        if (!gateway_found) {
+            gateway = 0;
+        }
+
+        result.emplace_back(NetworkInterface{
+            .name{ifa->ifa_name},
+            .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},
+            .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr},
+            .gateway{in_addr{.s_addr = gateway}}});
+    }
+
+    freeifaddrs(ifaddr);
+
+    return result;
+}
+
+#endif
+
+std::optional<NetworkInterface> GetSelectedNetworkInterface() {
+    const auto& selected_network_interface = Settings::values.network_interface.GetValue();
+    const auto network_interfaces = Network::GetAvailableNetworkInterfaces();
+    if (network_interfaces.size() == 0) {
+        LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces");
+        return std::nullopt;
+    }
+
+    const auto res =
+        std::ranges::find_if(network_interfaces, [&selected_network_interface](const auto& iface) {
+            return iface.name == selected_network_interface;
+        });
+
+    if (res == network_interfaces.end()) {
+        LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface);
+        return std::nullopt;
+    }
+
+    return *res;
+}
+
+} // namespace Network
--- a/src/core/network/network_interface.h
+++ b/src/core/network/network_interface.h
@@ -0,0 +1,29 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <winsock2.h>
+#else
+#include <netinet/in.h>
+#endif
+
+namespace Network {
+
+struct NetworkInterface {
+    std::string name;
+    struct in_addr ip_address;
+    struct in_addr subnet_mask;
+    struct in_addr gateway;
+};
+
+std::vector<NetworkInterface> GetAvailableNetworkInterfaces();
+std::optional<NetworkInterface> GetSelectedNetworkInterface();
+
+} // namespace Network
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
    return "Unknown";
 }

+static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
+    switch (backend) {
+    case Settings::NvdecEmulation::Off:
+        return "Off";
+    case Settings::NvdecEmulation::CPU:
+        return "CPU";
+    case Settings::NvdecEmulation::GPU:
+        return "GPU";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
    u64 telemetry_id{};
    const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation.GetValue());
-    AddField(field_type, "Renderer_UseNvdecEmulation",
-             Settings::values.use_nvdec_emulation.GetValue());
+    AddField(field_type, "Renderer_NvdecEmulation",
+             TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
    AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
    AddField(field_type, "Renderer_ShaderBackend",
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -304,10 +304,10 @@ std::vector<std::unique_ptr<Polling::DevicePoller>> InputSubsystem::GetPollers([
 }

 std::string GenerateKeyboardParam(int key_code) {
-    Common::ParamPackage param{
-        {"engine", "keyboard"},
-        {"code", std::to_string(key_code)},
-    };
+    Common::ParamPackage param;
+    param.Set("engine", "keyboard");
+    param.Set("code", key_code);
+    param.Set("toggle", false);
    return param.Serialize();
 }

--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -57,6 +57,7 @@ Common::ParamPackage MouseButtonFactory::GetNextInput() const {
        if (pad.button != MouseInput::MouseButton::Undefined) {
            params.Set("engine", "mouse");
            params.Set("button", static_cast<u16>(pad.button));
+            params.Set("toggle", false);
            return params;
        }
    }
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -82,6 +82,12 @@ public:
        state.buttons.insert_or_assign(button, value);
    }

+    void PreSetButton(int button) {
+        if (!state.buttons.contains(button)) {
+            SetButton(button, false);
+        }
+    }
+
    void SetMotion(SDL_ControllerSensorEvent event) {
        constexpr float gravity_constant = 9.80665f;
        std::lock_guard lock{mutex};
@@ -155,9 +161,16 @@ public:
        state.axes.insert_or_assign(axis, value);
    }

-    float GetAxis(int axis, float range) const {
+    void PreSetAxis(int axis) {
+        if (!state.axes.contains(axis)) {
+            SetAxis(axis, 0);
+        }
+    }
+
+    float GetAxis(int axis, float range, float offset) const {
        std::lock_guard lock{mutex};
-        return static_cast<float>(state.axes.at(axis)) / (32767.0f * range);
+        const float value = static_cast<float>(state.axes.at(axis)) / 32767.0f;
+        return (value + offset) / range;
    }

    bool RumblePlay(u16 amp_low, u16 amp_high) {
@@ -174,9 +187,10 @@ public:
        return false;
    }

-    std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range) const {
-        float x = GetAxis(axis_x, range);
-        float y = GetAxis(axis_y, range);
+    std::tuple<float, float> GetAnalog(int axis_x, int axis_y, float range, float offset_x,
+                                       float offset_y) const {
+        float x = GetAxis(axis_x, range, offset_x);
+        float y = GetAxis(axis_y, range, offset_y);
        y = -y; // 3DS uses an y-axis inverse from SDL

        // Make sure the coordinates are in the unit circle,
@@ -483,7 +497,7 @@ public:
          trigger_if_greater(trigger_if_greater_) {}

    bool GetStatus() const override {
-        const float axis_value = joystick->GetAxis(axis, 1.0f);
+        const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
        if (trigger_if_greater) {
            return axis_value > threshold;
        }
@@ -500,12 +514,14 @@ private:
 class SDLAnalog final : public Input::AnalogDevice {
 public:
    explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
-                       bool invert_x_, bool invert_y_, float deadzone_, float range_)
+                       bool invert_x_, bool invert_y_, float deadzone_, float range_,
+                       float offset_x_, float offset_y_)
        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
-          invert_y(invert_y_), deadzone(deadzone_), range(range_) {}
+          invert_y(invert_y_), deadzone(deadzone_), range(range_), offset_x(offset_x_),
+          offset_y(offset_y_) {}

    std::tuple<float, float> GetStatus() const override {
-        auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
+        auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range, offset_x, offset_y);
        const float r = std::sqrt((x * x) + (y * y));
        if (invert_x) {
            x = -x;
@@ -522,8 +538,8 @@ public:
    }

    std::tuple<float, float> GetRawStatus() const override {
-        const float x = joystick->GetAxis(axis_x, range);
-        const float y = joystick->GetAxis(axis_y, range);
+        const float x = joystick->GetAxis(axis_x, range, offset_x);
+        const float y = joystick->GetAxis(axis_y, range, offset_y);
        return {x, -y};
    }

@@ -555,6 +571,8 @@ private:
    const bool invert_y;
    const float deadzone;
    const float range;
+    const float offset_x;
+    const float offset_y;
 };

 class SDLVibration final : public Input::VibrationDevice {
@@ -621,7 +639,7 @@ public:
          trigger_if_greater(trigger_if_greater_) {}

    Input::MotionStatus GetStatus() const override {
-        const float axis_value = joystick->GetAxis(axis, 1.0f);
+        const float axis_value = joystick->GetAxis(axis, 1.0f, 0.0f);
        bool trigger = axis_value < threshold;
        if (trigger_if_greater) {
            trigger = axis_value > threshold;
@@ -720,13 +738,13 @@ public:
                LOG_ERROR(Input, "Unknown direction {}", direction_name);
            }
            // This is necessary so accessing GetAxis with axis won't crash
-            joystick->SetAxis(axis, 0);
+            joystick->PreSetAxis(axis);
            return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
        }

        const int button = params.Get("button", 0);
        // This is necessary so accessing GetButton with button won't crash
-        joystick->SetButton(button, false);
+        joystick->PreSetButton(button);
        return std::make_unique<SDLButton>(joystick, button, toggle);
    }

@@ -757,13 +775,15 @@ public:
        const std::string invert_y_value = params.Get("invert_y", "+");
        const bool invert_x = invert_x_value == "-";
        const bool invert_y = invert_y_value == "-";
+        const float offset_x = params.Get("offset_x", 0.0f);
+        const float offset_y = params.Get("offset_y", 0.0f);
        auto joystick = state.GetSDLJoystickByGUID(guid, port);

        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
-        joystick->SetAxis(axis_x, 0);
-        joystick->SetAxis(axis_y, 0);
+        joystick->PreSetAxis(axis_x);
+        joystick->PreSetAxis(axis_y);
        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
-                                           range);
+                                           range, offset_x, offset_y);
    }

 private:
@@ -844,13 +864,13 @@ public:
                LOG_ERROR(Input, "Unknown direction {}", direction_name);
            }
            // This is necessary so accessing GetAxis with axis won't crash
-            joystick->SetAxis(axis, 0);
+            joystick->PreSetAxis(axis);
            return std::make_unique<SDLAxisMotion>(joystick, axis, threshold, trigger_if_greater);
        }

        const int button = params.Get("button", 0);
        // This is necessary so accessing GetButton with button won't crash
-        joystick->SetButton(button, false);
+        joystick->PreSetButton(button);
        return std::make_unique<SDLButtonMotion>(joystick, button);
    }

@@ -869,6 +889,9 @@ SDLState::SDLState() {
    RegisterFactory<VibrationDevice>("sdl", vibration_factory);
    RegisterFactory<MotionDevice>("sdl", motion_factory);

+    // Disable raw input. When enabled this setting causes SDL to die when a web applet opens
+    SDL_SetHint(SDL_HINT_JOYSTICK_RAWINPUT, "0");
+
    // Enable HIDAPI rumble. This prevents SDL from disabling motion on PS4 and PS5 controllers
    SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS4_RUMBLE, "1");
    SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS5_RUMBLE, "1");
@@ -995,6 +1018,7 @@ Common::ParamPackage BuildButtonParamPackageForButton(int port, std::string guid
    params.Set("port", port);
    params.Set("guid", std::move(guid));
    params.Set("button", button);
+    params.Set("toggle", false);
    return params;
 }

@@ -1134,13 +1158,15 @@ Common::ParamPackage BuildParamPackageForBinding(int port, const std::string& gu
 }

 Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& guid, int axis_x,
-                                                int axis_y) {
+                                                int axis_y, float offset_x, float offset_y) {
    Common::ParamPackage params;
    params.Set("engine", "sdl");
    params.Set("port", port);
    params.Set("guid", guid);
    params.Set("axis_x", axis_x);
    params.Set("axis_y", axis_y);
+    params.Set("offset_x", offset_x);
+    params.Set("offset_y", offset_y);
    params.Set("invert_x", "+");
    params.Set("invert_y", "+");
    return params;
@@ -1342,24 +1368,39 @@ AnalogMapping SDLState::GetAnalogMappingForDevice(const Common::ParamPackage& pa
    const auto& binding_left_y =
        SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_LEFTY);
    if (params.Has("guid2")) {
+        joystick2->PreSetAxis(binding_left_x.value.axis);
+        joystick2->PreSetAxis(binding_left_y.value.axis);
+        const auto left_offset_x = -joystick2->GetAxis(binding_left_x.value.axis, 1.0f, 0);
+        const auto left_offset_y = -joystick2->GetAxis(binding_left_y.value.axis, 1.0f, 0);
        mapping.insert_or_assign(
            Settings::NativeAnalog::LStick,
            BuildParamPackageForAnalog(joystick2->GetPort(), joystick2->GetGUID(),
-                                       binding_left_x.value.axis, binding_left_y.value.axis));
+                                       binding_left_x.value.axis, binding_left_y.value.axis,
+                                       left_offset_x, left_offset_y));
    } else {
+        joystick->PreSetAxis(binding_left_x.value.axis);
+        joystick->PreSetAxis(binding_left_y.value.axis);
+        const auto left_offset_x = -joystick->GetAxis(binding_left_x.value.axis, 1.0f, 0);
+        const auto left_offset_y = -joystick->GetAxis(binding_left_y.value.axis, 1.0f, 0);
        mapping.insert_or_assign(
            Settings::NativeAnalog::LStick,
            BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
-                                       binding_left_x.value.axis, binding_left_y.value.axis));
+                                       binding_left_x.value.axis, binding_left_y.value.axis,
+                                       left_offset_x, left_offset_y));
    }
    const auto& binding_right_x =
        SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTX);
    const auto& binding_right_y =
        SDL_GameControllerGetBindForAxis(controller, SDL_CONTROLLER_AXIS_RIGHTY);
+    joystick->PreSetAxis(binding_right_x.value.axis);
+    joystick->PreSetAxis(binding_right_y.value.axis);
+    const auto right_offset_x = -joystick->GetAxis(binding_right_x.value.axis, 1.0f, 0);
+    const auto right_offset_y = -joystick->GetAxis(binding_right_y.value.axis, 1.0f, 0);
    mapping.insert_or_assign(Settings::NativeAnalog::RStick,
                             BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
                                                        binding_right_x.value.axis,
-                                                        binding_right_y.value.axis));
+                                                        binding_right_y.value.axis, right_offset_x,
+                                                        right_offset_y));
    return mapping;
 }

@@ -1563,8 +1604,9 @@ public:
            }

            if (const auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which)) {
+                // Set offset to zero since the joystick is not on center
                auto params = BuildParamPackageForAnalog(joystick->GetPort(), joystick->GetGUID(),
-                                                         first_axis, axis);
+                                                         first_axis, axis, 0, 0);
                first_axis = -1;
                return params;
            }
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@

 namespace Shader::Backend::SPIRV {
 namespace {
+constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
+
 enum class Operation {
    Increment,
    Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
        return pointer_type;
    }
 }
+
+size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
+                              size_t start_offset) {
+    for (size_t location = start_offset; location < used_locations.size(); ++location) {
+        if (!used_locations.test(location)) {
+            return location;
+        }
+    }
+    throw RuntimeError("Unable to get an unused location for legacy attribute");
+}
 } // Anonymous namespace

 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
        loads[IR::Attribute::TessellationEvaluationPointV]) {
        tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
    }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
        const AttributeType input_type{runtime_info.generic_input_types[index]};
        if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
        if (input_type == AttributeType::Disabled) {
            continue;
        }
+        used_locations.set(index);
        const Id type{GetAttributeType(*this, input_type)};
        const Id id{DefineInput(*this, type, true)};
        Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
            break;
        }
    }
+    size_t previous_unused_location = 0;
+    if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineInput(*this, F32[4], true)};
+        Decorate(id, spv::Decoration::Location, location);
+        input_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineInput(*this, F32[4], true)};
+            Decorate(id, spv::Decoration::Location, location);
+            input_fixed_fnc_textures[index] = id;
+        }
+    }
    if (stage == Stage::TessellationEval) {
        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
            if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
        viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
                                     spv::BuiltIn::ViewportMaskNV);
    }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
        if (info.stores.Generic(index)) {
            DefineGenericOutput(*this, index, invocations);
+            used_locations.set(index);
+        }
+    }
+    size_t previous_unused_location = 0;
+    if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineOutput(*this, F32[4], invocations)};
+        Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
+        output_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineOutput(*this, F32[4], invocations)};
+            Decorate(id, spv::Decoration::Location, location);
+            output_fixed_fnc_textures[index] = id;
        }
    }
    switch (stage) {
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
    Id write_global_func_u32x4{};

    Id input_position{};
+    Id input_front_color{};
+    std::array<Id, 10> input_fixed_fnc_textures{};
    std::array<Id, 32> input_generics{};

    Id output_point_size{};
    Id output_position{};
+    Id output_front_color{};
+    std::array<Id, 10> output_fixed_fnc_textures{};
    std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};

    Id output_tess_level_outer{};
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
    }
 }

+bool IsFixedFncTexture(IR::Attribute attribute) {
+    return attribute >= IR::Attribute::FixedFncTexture0S &&
+           attribute <= IR::Attribute::FixedFncTexture9Q;
+}
+
+u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
+}
+
+u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return static_cast<u32>(attribute) % 4u;
+}
+
 template <typename... Args>
 Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
    if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
            return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
        }
    }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const u32 element{FixedFncTextureAttributeElement(attr)};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
+                                 element_id);
+    }
    switch (attr) {
    case IR::Attribute::PointSize:
        return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
        const Id element_id{ctx.Const(element)};
        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
    }
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        const u32 element{static_cast<u32>(attr) % 4};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
+    }
    case IR::Attribute::ClipDistance0:
    case IR::Attribute::ClipDistance1:
    case IR::Attribute::ClipDistance2:
@@ -298,19 +332,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
    if (IR::IsGeneric(attr)) {
        const u32 index{IR::GenericAttributeIndex(attr)};
        const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
-        if (!type) {
-            // Attribute is disabled
+        if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+            // Attribute is disabled or varying component is not written
            return ctx.Const(element == 3 ? 1.0f : 0.0f);
        }
-        if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
-            // Varying component is not written
-            return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
-        }
        const Id generic_id{ctx.input_generics.at(index)};
        const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
        const Id value{ctx.OpLoad(type->id, pointer)};
        return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
    }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const Id attr_id{ctx.input_fixed_fnc_textures[index]};
+        const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
+        return ctx.OpLoad(ctx.F32[1], attr_ptr);
+    }
    switch (attr) {
    case IR::Attribute::PrimitiveId:
        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -320,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
    case IR::Attribute::PositionW:
        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
                                                  ctx.Const(element)));
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
+                                                  ctx.Const(element)));
+    }
    case IR::Attribute::InstanceId:
        if (ctx.profile.support_vertex_instance_id) {
            return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -337,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
            return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
        }
    case IR::Attribute::FrontFace:
-        return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
-                            ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value);
+        return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
+                            ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
+                            ctx.f32_zero_value);
    case IR::Attribute::PointSpriteS:
        return ctx.OpLoad(ctx.F32[1],
                          ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
 #include "shader_recompiler/frontend/maxwell/decode.h"
 #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 #include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/host_translate_info.h"
 #include "shader_recompiler/object_pool.h"

 namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
 public:
    TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
                  ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
-                  IR::AbstractSyntaxList& syntax_list_)
+                  IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
        : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
          syntax_list{syntax_list_} {
        Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
        IR::Block& first_block{*syntax_list.front().data.block};
        IR::IREmitter ir(first_block, first_block.begin());
        ir.Prologue();
+        if (uses_demote_to_helper && host_info.needs_demote_reorder) {
+            DemoteCombinationPass();
+        }
    }

 private:
@@ -809,7 +813,14 @@ private:
            }
            case StatementType::Return: {
                ensure_block();
-                IR::IREmitter{*current_block}.Epilogue();
+                IR::Block* return_block{block_pool.Create(inst_pool)};
+                IR::IREmitter{*return_block}.Epilogue();
+                current_block->AddBranch(return_block);
+
+                auto& merge{syntax_list.emplace_back()};
+                merge.type = IR::AbstractSyntaxNode::Type::Block;
+                merge.data.block = return_block;
+
                current_block = nullptr;
                syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
                break;
@@ -824,6 +835,7 @@ private:
                auto& merge{syntax_list.emplace_back()};
                merge.type = IR::AbstractSyntaxNode::Type::Block;
                merge.data.block = demote_block;
+                uses_demote_to_helper = true;
                break;
            }
            case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
        return block_pool.Create(inst_pool);
    }

+    void DemoteCombinationPass() {
+        using Type = IR::AbstractSyntaxNode::Type;
+        std::vector<IR::Block*> demote_blocks;
+        std::vector<IR::U1> demote_conds;
+        u32 num_epilogues{};
+        u32 branch_depth{};
+        for (const IR::AbstractSyntaxNode& node : syntax_list) {
+            if (node.type == Type::If) {
+                ++branch_depth;
+            }
+            if (node.type == Type::EndIf) {
+                --branch_depth;
+            }
+            if (node.type != Type::Block) {
+                continue;
+            }
+            if (branch_depth > 1) {
+                // Skip reordering nested demote branches.
+                continue;
+            }
+            for (const IR::Inst& inst : node.data.block->Instructions()) {
+                const IR::Opcode op{inst.GetOpcode()};
+                if (op == IR::Opcode::DemoteToHelperInvocation) {
+                    demote_blocks.push_back(node.data.block);
+                    break;
+                }
+                if (op == IR::Opcode::Epilogue) {
+                    ++num_epilogues;
+                }
+            }
+        }
+        if (demote_blocks.size() == 0) {
+            return;
+        }
+        if (num_epilogues > 1) {
+            LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
+            return;
+        }
+        s64 last_iterator_offset{};
+        auto& asl{syntax_list};
+        for (const IR::Block* demote_block : demote_blocks) {
+            const auto start_it{asl.begin() + last_iterator_offset};
+            auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::If && asn.data.if_node.body == demote_block;
+            })};
+            if (asl_it == asl.end()) {
+                // Demote without a conditional branch.
+                // No need to proceed since all fragment instances will be demoted regardless.
+                return;
+            }
+            const IR::Block* const end_if = asl_it->data.if_node.merge;
+            demote_conds.push_back(asl_it->data.if_node.cond);
+            last_iterator_offset = std::distance(asl.begin(), asl_it);
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::Block && asn.data.block == demote_block;
+            });
+
+            asl_it = asl.erase(asl_it);
+            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+                return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
+            });
+            asl_it = asl.erase(asl_it);
+        }
+        const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
+            if (asn.type != Type::Block) {
+                return false;
+            }
+            for (const auto& inst : asn.data.block->Instructions()) {
+                if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+                    return true;
+                }
+            }
+            return false;
+        }};
+        const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
+        const auto return_block_it{(reverse_it + 1).base()};
+
+        IR::IREmitter ir{*(return_block_it - 1)->data.block};
+        IR::U1 cond(IR::Value(false));
+        for (const auto& demote_cond : demote_conds) {
+            cond = ir.LogicalOr(cond, demote_cond);
+        }
+        cond.Inst()->DestructiveAddUsage(1);
+
+        IR::AbstractSyntaxNode demote_if_node{};
+        demote_if_node.type = Type::If;
+        demote_if_node.data.if_node.cond = cond;
+        demote_if_node.data.if_node.body = demote_blocks[0];
+        demote_if_node.data.if_node.merge = return_block_it->data.block;
+
+        IR::AbstractSyntaxNode demote_node{};
+        demote_node.type = Type::Block;
+        demote_node.data.block = demote_blocks[0];
+
+        IR::AbstractSyntaxNode demote_endif_node{};
+        demote_endif_node.type = Type::EndIf;
+        demote_endif_node.data.end_if.merge = return_block_it->data.block;
+
+        asl.insert(return_block_it, demote_endif_node);
+        asl.insert(return_block_it, demote_node);
+        asl.insert(return_block_it, demote_if_node);
+    }
+
    ObjectPool<Statement>& stmt_pool;
    ObjectPool<IR::Inst>& inst_pool;
    ObjectPool<IR::Block>& block_pool;
    Environment& env;
    IR::AbstractSyntaxList& syntax_list;
+    bool uses_demote_to_helper{};

 // TODO: C++20 Remove this when all compilers support constexpr std::vector
 #if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
 } // Anonymous namespace

 IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
-                                Environment& env, Flow::CFG& cfg) {
+                                Environment& env, Flow::CFG& cfg,
+                                const HostTranslateInfo& host_info) {
    ObjectPool<Statement> stmt_pool{64};
    GotoPass goto_pass{cfg, stmt_pool};
    Statement& root{goto_pass.RootStatement()};
    IR::AbstractSyntaxList syntax_list;
-    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
    return syntax_list;
 }

--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"

-namespace Shader::Maxwell {
+namespace Shader {
+struct HostTranslateInfo;
+namespace Maxwell {

 [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
                                              ObjectPool<IR::Block>& block_pool, Environment& env,
-                                              Flow::CFG& cfg);
+                                              Flow::CFG& cfg, const HostTranslateInfo& host_info);

-} // namespace Shader::Maxwell
+} // namespace Maxwell
+} // namespace Shader
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
 IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
                             Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
    IR::Program program;
-    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
    program.blocks = GenerateBlocks(program.syntax_list);
    program.post_order_blocks = PostOrder(program.syntax_list.front());
    program.stage = env.ShaderStage();
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {

 /// Misc information about the host
 struct HostTranslateInfo {
-    bool support_float16{}; ///< True when the device supports 16-bit floats
-    bool support_int64{};   ///< True when the device supports 64-bit integers
+    bool support_float16{};      ///< True when the device supports 16-bit floats
+    bool support_int64{};        ///< True when the device supports 64-bit integers
+    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
 };

 } // namespace Shader
--- a/src/tests/common/param_package.cpp
+++ b/src/tests/common/param_package.cpp
@@ -4,11 +4,13 @@

 #include <catch2/catch.hpp>
 #include <math.h>
+#include "common/logging/backend.h"
 #include "common/param_package.h"

 namespace Common {

 TEST_CASE("ParamPackage", "[common]") {
+    Common::Log::DisableLoggingInTests();
    ParamPackage original{
        {"abc", "xyz"},
        {"def", "42"},
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -97,6 +97,7 @@ add_library(video_core STATIC
    renderer_opengl/gl_stream_buffer.h
    renderer_opengl/gl_texture_cache.cpp
    renderer_opengl/gl_texture_cache.h
+    renderer_opengl/gl_texture_cache_base.cpp
    renderer_opengl/gl_query_cache.cpp
    renderer_opengl/gl_query_cache.h
    renderer_opengl/maxwell_to_gl.h
@@ -155,6 +156,7 @@ add_library(video_core STATIC
    renderer_vulkan/vk_swapchain.h
    renderer_vulkan/vk_texture_cache.cpp
    renderer_vulkan/vk_texture_cache.h
+    renderer_vulkan/vk_texture_cache_base.cpp
    renderer_vulkan/vk_update_descriptor.cpp
    renderer_vulkan/vk_update_descriptor.h
    shader_cache.cpp
@@ -186,6 +188,7 @@ add_library(video_core STATIC
    texture_cache/samples_helper.h
    texture_cache/slot_vector.h
    texture_cache/texture_cache.h
+    texture_cache/texture_cache_base.h
    texture_cache/types.h
    texture_cache/util.cpp
    texture_cache/util.h
@@ -228,6 +231,7 @@ endif()

 target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
 target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})

 add_dependencies(video_core host_shaders)
 target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -261,16 +261,6 @@ public:
        stream_score += score;
    }

-    /// Sets the new frame tick
-    void SetFrameTick(u64 new_frame_tick) noexcept {
-        frame_tick = new_frame_tick;
-    }
-
-    /// Returns the new frame tick
-    [[nodiscard]] u64 FrameTick() const noexcept {
-        return frame_tick;
-    }
-
    /// Returns the likeliness of this being a stream buffer
    [[nodiscard]] int StreamScore() const noexcept {
        return stream_score;
@@ -307,6 +297,14 @@ public:
        return words.size_bytes;
    }

+    size_t getLRUID() const noexcept {
+        return lru_id;
+    }
+
+    void setLRUID(size_t lru_id_) {
+        lru_id = lru_id_;
+    }
+
 private:
    template <Type type>
    u64* Array() noexcept {
@@ -603,9 +601,9 @@ private:
    RasterizerInterface* rasterizer = nullptr;
    VAddr cpu_addr = 0;
    Words words;
-    u64 frame_tick = 0;
    BufferFlagBits flags{};
    int stream_score = 0;
+    size_t lru_id = SIZE_MAX;
 };

 } // namespace VideoCommon
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,6 +20,7 @@
 #include "common/common_types.h"
 #include "common/div_ceil.h"
 #include "common/literals.h"
+#include "common/lru_cache.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
@@ -330,7 +331,7 @@ private:
    template <bool insert>
    void ChangeRegister(BufferId buffer_id);

-    void TouchBuffer(Buffer& buffer) const noexcept;
+    void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;

    bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);

@@ -428,7 +429,11 @@ private:
    size_t immediate_buffer_capacity = 0;
    std::unique_ptr<u8[]> immediate_buffer_alloc;

-    typename SlotVector<Buffer>::Iterator deletion_iterator;
+    struct LRUItemParams {
+        using ObjectType = BufferId;
+        using TickType = u64;
+    };
+    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
    u64 frame_tick = 0;
    u64 total_used_memory = 0;

@@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
    // Ensure the first slot is used for the null buffer
    void(slot_buffers.insert(runtime, NullBufferParams{}));
-    deletion_iterator = slot_buffers.end();
    common_ranges.clear();
 }

@@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
    const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
    const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
    int num_iterations = aggressive_gc ? 64 : 32;
-    for (; num_iterations > 0; --num_iterations) {
-        if (deletion_iterator == slot_buffers.end()) {
-            deletion_iterator = slot_buffers.begin();
+    const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
+        if (num_iterations == 0) {
+            return true;
        }
-        ++deletion_iterator;
-        if (deletion_iterator == slot_buffers.end()) {
-            break;
-        }
-        const auto [buffer_id, buffer] = *deletion_iterator;
-        if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
-            DownloadBufferMemory(*buffer);
-            DeleteBuffer(buffer_id);
-        }
-    }
+        --num_iterations;
+        auto& buffer = slot_buffers[buffer_id];
+        DownloadBufferMemory(buffer);
+        DeleteBuffer(buffer_id);
+        return false;
+    };
+    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
 }

 template <class P>
@@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {
    const bool skip_preferred = hits * 256 < shots * 251;
    uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;

-    if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
+    if (total_used_memory >= EXPECTED_MEMORY) {
        RunGarbageCollector();
    }
    ++frame_tick;
@@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
 template <class P>
 void BufferCache<P>::BindHostIndexBuffer() {
    Buffer& buffer = slot_buffers[index_buffer.buffer_id];
-    TouchBuffer(buffer);
+    TouchBuffer(buffer, index_buffer.buffer_id);
    const u32 offset = buffer.Offset(index_buffer.cpu_addr);
    const u32 size = index_buffer.size;
    SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
        const Binding& binding = vertex_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
        SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
        if (!flags[Dirty::VertexBuffer0 + index]) {
            continue;
@@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
    const VAddr cpu_addr = binding.cpu_addr;
    const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
    Buffer& buffer = slot_buffers[binding.buffer_id];
-    TouchBuffer(buffer);
+    TouchBuffer(buffer, binding.buffer_id);
    const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
                                 size <= uniform_buffer_skip_cache_size &&
                                 !buffer.IsRegionGpuModified(cpu_addr, size);
@@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
    ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
        const Binding& binding = storage_buffers[stage][index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
        const u32 size = binding.size;
        SynchronizeBuffer(buffer, binding.cpu_addr, size);

@@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
        const Binding& binding = transform_feedback_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
        const u32 size = binding.size;
        SynchronizeBuffer(buffer, binding.cpu_addr, size);

@@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
    ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
        const Binding& binding = compute_uniform_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
        const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
        SynchronizeBuffer(buffer, binding.cpu_addr, size);

@@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
    ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
        const Binding& binding = compute_storage_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
-        TouchBuffer(buffer);
+        TouchBuffer(buffer, binding.buffer_id);
        const u32 size = binding.size;
        SynchronizeBuffer(buffer, binding.cpu_addr, size);

@@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
    const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
    const u32 size = static_cast<u32>(overlap.end - overlap.begin);
    const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
-    TouchBuffer(slot_buffers[new_buffer_id]);
    for (const BufferId overlap_id : overlap.ids) {
        JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
    }
    Register(new_buffer_id);
+    TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
    return new_buffer_id;
 }

@@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
 template <class P>
 template <bool insert>
 void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
-    const Buffer& buffer = slot_buffers[buffer_id];
+    Buffer& buffer = slot_buffers[buffer_id];
    const auto size = buffer.SizeBytes();
    if (insert) {
        total_used_memory += Common::AlignUp(size, 1024);
+        buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
    } else {
        total_used_memory -= Common::AlignUp(size, 1024);
+        lru_cache.Free(buffer.getLRUID());
    }
    const VAddr cpu_addr_begin = buffer.CpuAddr();
    const VAddr cpu_addr_end = cpu_addr_begin + size;
@@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
 }

 template <class P>
-void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
-    buffer.SetFrameTick(frame_tick);
+void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
+    if (buffer_id != NULL_BUFFER_ID) {
+        lru_cache.Touch(buffer.getLRUID(), frame_tick);
+    }
 }

 template <class P>
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <vector>
 #include "common/assert.h"
+#include "common/settings.h"
 #include "video_core/command_classes/codecs/codec.h"
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/command_classes/codecs/vp9.h"
@@ -16,44 +17,28 @@ extern "C" {
 }

 namespace Tegra {
-#if defined(LIBVA_FOUND)
-// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
 namespace {
-constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
-    "i915",
-    "amdgpu",
-};
+constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
+constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;

-AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
+void AVPacketDeleter(AVPacket* ptr) {
+    av_packet_free(&ptr);
+}
+
+using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
+
+AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
    for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
-        if (*p == AV_PIX_FMT_VAAPI) {
-            return AV_PIX_FMT_VAAPI;
+        if (*p == av_codec_ctx->pix_fmt) {
+            return av_codec_ctx->pix_fmt;
        }
    }
    LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
-    return *pix_fmts;
-}
-
-bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
-    AVDictionary* hwdevice_options = nullptr;
-    av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
-    for (const auto& driver : VAAPI_DRIVERS) {
-        av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
-        const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
-                                                          nullptr, hwdevice_options, 0);
-        if (hwdevice_error >= 0) {
-            LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
-            av_dict_free(&hwdevice_options);
-            return true;
-        }
-        LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
-    }
-    LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
-    av_dict_free(&hwdevice_options);
-    return false;
+    av_buffer_unref(&av_codec_ctx->hw_device_ctx);
+    av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
+    return PREFERRED_CPU_FMT;
 }
 } // namespace
-#endif

 void AVFrameDeleter(AVFrame* ptr) {
    av_frame_free(&ptr);
@@ -68,56 +53,110 @@ Codec::~Codec() {
        return;
    }
    // Free libav memory
-    avcodec_send_packet(av_codec_ctx, nullptr);
-    AVFrame* av_frame = av_frame_alloc();
-    avcodec_receive_frame(av_codec_ctx, av_frame);
-    avcodec_flush_buffers(av_codec_ctx);
-    av_frame_free(&av_frame);
-    avcodec_close(av_codec_ctx);
-    av_buffer_unref(&av_hw_device);
+    avcodec_free_context(&av_codec_ctx);
+    av_buffer_unref(&av_gpu_decoder);
 }

-void Codec::InitializeHwdec() {
-    // Prioritize integrated GPU to mitigate bandwidth bottlenecks
+bool Codec::CreateGpuAvDevice() {
 #if defined(LIBVA_FOUND)
-    if (CreateVaapiHwdevice(&av_hw_device)) {
-        const auto hw_device_ctx = av_buffer_ref(av_hw_device);
-        ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
-        av_codec_ctx->hw_device_ctx = hw_device_ctx;
-        av_codec_ctx->get_format = GetHwFormat;
+    static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
+        "i915",
+        "iHD",
+        "amdgpu",
+    };
+    AVDictionary* hwdevice_options = nullptr;
+    av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
+    for (const auto& driver : VAAPI_DRIVERS) {
+        av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
+        const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
+                                                          nullptr, hwdevice_options, 0);
+        if (hwdevice_error >= 0) {
+            LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
+            av_dict_free(&hwdevice_options);
+            av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
+            return true;
+        }
+        LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
+    }
+    LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
+    av_dict_free(&hwdevice_options);
+#endif
+    static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
+    static constexpr std::array GPU_DECODER_TYPES{
+        AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+        AV_HWDEVICE_TYPE_D3D11VA,
+#else
+        AV_HWDEVICE_TYPE_VDPAU,
+#endif
+    };
+    for (const auto& type : GPU_DECODER_TYPES) {
+        const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
+        if (hwdevice_res < 0) {
+            LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
+                      av_hwdevice_get_type_name(type), hwdevice_res);
+            continue;
+        }
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
+            if (!config) {
+                LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
+                          av_codec->name, av_hwdevice_get_type_name(type));
+                break;
+            }
+            if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
+                av_codec_ctx->pix_fmt = config->pix_fmt;
+                LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void Codec::InitializeAvCodecContext() {
+    av_codec_ctx = avcodec_alloc_context3(av_codec);
+    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+}
+
+void Codec::InitializeGpuDecoder() {
+    if (!CreateGpuAvDevice()) {
+        av_buffer_unref(&av_gpu_decoder);
        return;
    }
-#endif
-    // TODO more GPU accelerated decoders
+    auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
+    ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
+    av_codec_ctx->hw_device_ctx = hw_device_ctx;
+    av_codec_ctx->get_format = GetGpuFormat;
 }

 void Codec::Initialize() {
-    AVCodecID codec;
-    switch (current_codec) {
-    case NvdecCommon::VideoCodec::H264:
-        codec = AV_CODEC_ID_H264;
-        break;
-    case NvdecCommon::VideoCodec::Vp9:
-        codec = AV_CODEC_ID_VP9;
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+    const AVCodecID codec = [&] {
+        switch (current_codec) {
+        case NvdecCommon::VideoCodec::H264:
+            return AV_CODEC_ID_H264;
+        case NvdecCommon::VideoCodec::Vp9:
+            return AV_CODEC_ID_VP9;
+        default:
+            UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+            return AV_CODEC_ID_NONE;
+        }
+    }();
+    av_codec = avcodec_find_decoder(codec);
+
+    InitializeAvCodecContext();
+    if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
+        InitializeGpuDecoder();
+    }
+    if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
+        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
+        avcodec_free_context(&av_codec_ctx);
+        av_buffer_unref(&av_gpu_decoder);
        return;
    }
-    av_codec = avcodec_find_decoder(codec);
-    av_codec_ctx = avcodec_alloc_context3(av_codec);
-    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-    InitializeHwdec();
    if (!av_codec_ctx->hw_device_ctx) {
        LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
    }
-    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
-    if (av_error < 0) {
-        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
-        avcodec_close(av_codec_ctx);
-        av_buffer_unref(&av_hw_device);
-        return;
-    }
    initialized = true;
 }

@@ -133,6 +172,9 @@ void Codec::Decode() {
    if (is_first_frame) {
        Initialize();
    }
+    if (!initialized) {
+        return;
+    }
    bool vp9_hidden_frame = false;
    std::vector<u8> frame_data;
    if (current_codec == NvdecCommon::VideoCodec::H264) {
@@ -141,50 +183,48 @@ void Codec::Decode() {
        frame_data = vp9_decoder->ComposeFrameHeader(state);
        vp9_hidden_frame = vp9_decoder->WasFrameHidden();
    }
-    AVPacket packet{};
-    av_init_packet(&packet);
-    packet.data = frame_data.data();
-    packet.size = static_cast<s32>(frame_data.size());
-    if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
-        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
+    AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
+    if (!packet) {
+        LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
+        return;
+    }
+    packet->data = frame_data.data();
+    packet->size = static_cast<s32>(frame_data.size());
+    if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
+        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
        return;
    }
    // Only receive/store visible frames
    if (vp9_hidden_frame) {
        return;
    }
-    AVFrame* hw_frame = av_frame_alloc();
-    AVFrame* sw_frame = hw_frame;
-    ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
-    if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
+    AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
+    AVFramePtr final_frame{nullptr, AVFrameDeleter};
+    ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
+    if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
        LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
-        av_frame_free(&hw_frame);
        return;
    }
-    if (!hw_frame->width || !hw_frame->height) {
+    if (initial_frame->width == 0 || initial_frame->height == 0) {
        LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
-        av_frame_free(&hw_frame);
        return;
    }
-#if defined(LIBVA_FOUND)
-    // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
-    if (hw_frame->format == AV_PIX_FMT_VAAPI) {
-        sw_frame = av_frame_alloc();
-        ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
+    if (av_codec_ctx->hw_device_ctx) {
+        final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+        ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
        // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
        // because Intel drivers crash unless using AV_PIX_FMT_NV12
-        sw_frame->format = AV_PIX_FMT_NV12;
-        const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
-        ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
-        av_frame_free(&hw_frame);
+        final_frame->format = PREFERRED_GPU_FMT;
+        const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
+        ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
+    } else {
+        final_frame = std::move(initial_frame);
    }
-#endif
-    if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
-        UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
-        av_frame_free(&sw_frame);
+    if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
+        UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
        return;
    }
-    av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
+    av_frames.push(std::move(final_frame));
    if (av_frames.size() > 10) {
        LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
        av_frames.pop();
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -50,18 +50,23 @@ public:

    /// Returns the value of current_codec
    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
    /// Return name of the current codec
    [[nodiscard]] std::string_view GetCurrentCodecName() const;

 private:
-    void InitializeHwdec();
+    void InitializeAvCodecContext();
+
+    void InitializeGpuDecoder();
+
+    bool CreateGpuAvDevice();

    bool initialized{};
    NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};

    AVCodec* av_codec{nullptr};
-    AVBufferRef* av_hw_device{nullptr};
    AVCodecContext* av_codec_ctx{nullptr};
+    AVBufferRef* av_gpu_decoder{nullptr};

    GPU& gpu;
    const NvdecCommon::NvdecRegisters& state;
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
    const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
                           (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);

-    writer.WriteUe(16);
+    // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
+    writer.WriteUe(6); // Max number of reference frames
    writer.WriteBit(false);
    writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
    writer.WriteUe(pic_height - 1);
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -397,14 +397,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state)
        next_frame = std::move(temp);
    } else {
        next_frame.info = current_frame.info;
-        next_frame.bit_stream = std::move(current_frame.bit_stream);
+        next_frame.bit_stream = current_frame.bit_stream;
    }
    return current_frame;
 }

 std::vector<u8> VP9::ComposeCompressedHeader() {
    VpxRangeEncoder writer{};
-    const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame;
+    const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame;
    if (!current_frame_info.lossless) {
        if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
            writer.Write(3, 2);
@@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
    uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
    uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);

+    ASSERT(!current_frame_info.segment_enabled);
    uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).

    const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -22,7 +22,7 @@ struct Vp9FrameDimensions {
 };
 static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");

-enum FrameFlags : u32 {
+enum class FrameFlags : u32 {
    IsKeyFrame = 1 << 0,
    LastFrameIsKeyFrame = 1 << 1,
    FrameSizeChanged = 1 << 2,
@@ -30,6 +30,7 @@ enum FrameFlags : u32 {
    LastShowFrame = 1 << 4,
    IntraOnly = 1 << 5,
 };
+DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)

 enum class TxSize {
    Tx4x4 = 0,   // 4x4 transform
@@ -92,44 +93,34 @@ struct Vp9EntropyProbs {
 static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");

 struct Vp9PictureInfo {
-    bool is_key_frame;
-    bool intra_only;
-    bool last_frame_was_key;
-    bool frame_size_changed;
-    bool error_resilient_mode;
-    bool last_frame_shown;
-    bool show_frame;
+    u32 bitstream_size;
+    std::array<u64, 4> frame_offsets;
    std::array<s8, 4> ref_frame_sign_bias;
    s32 base_q_index;
    s32 y_dc_delta_q;
    s32 uv_dc_delta_q;
    s32 uv_ac_delta_q;
-    bool lossless;
    s32 transform_mode;
-    bool allow_high_precision_mv;
    s32 interp_filter;
    s32 reference_mode;
-    s8 comp_fixed_ref;
-    std::array<s8, 2> comp_var_ref;
    s32 log2_tile_cols;
    s32 log2_tile_rows;
-    bool segment_enabled;
-    bool segment_map_update;
-    bool segment_map_temporal_update;
-    s32 segment_abs_delta;
-    std::array<u32, 8> segment_feature_enable;
-    std::array<std::array<s16, 4>, 8> segment_feature_data;
-    bool mode_ref_delta_enabled;
-    bool use_prev_in_find_mv_refs;
    std::array<s8, 4> ref_deltas;
    std::array<s8, 2> mode_deltas;
    Vp9EntropyProbs entropy;
    Vp9FrameDimensions frame_size;
    u8 first_level;
    u8 sharpness_level;
-    u32 bitstream_size;
-    std::array<u64, 4> frame_offsets;
-    std::array<bool, 4> refresh_frame;
+    bool is_key_frame;
+    bool intra_only;
+    bool last_frame_was_key;
+    bool error_resilient_mode;
+    bool last_frame_shown;
+    bool show_frame;
+    bool lossless;
+    bool allow_high_precision_mv;
+    bool segment_enabled;
+    bool mode_ref_delta_enabled;
 };

 struct Vp9FrameContainer {
@@ -145,7 +136,7 @@ struct PictureInfo {
    Vp9FrameDimensions golden_frame_size;  ///< 0x50
    Vp9FrameDimensions alt_frame_size;     ///< 0x58
    Vp9FrameDimensions current_frame_size; ///< 0x60
-    u32 vp9_flags;                         ///< 0x68
+    FrameFlags vp9_flags;                  ///< 0x68
    std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
    u8 first_level;                        ///< 0x70
    u8 sharpness_level;                    ///< 0x71
@@ -158,60 +149,43 @@ struct PictureInfo {
    u8 allow_high_precision_mv;            ///< 0x78
    u8 interp_filter;                      ///< 0x79
    u8 reference_mode;                     ///< 0x7A
-    s8 comp_fixed_ref;                     ///< 0x7B
-    std::array<s8, 2> comp_var_ref;        ///< 0x7C
+    INSERT_PADDING_BYTES_NOINIT(3);        ///< 0x7B
    u8 log2_tile_cols;                     ///< 0x7E
    u8 log2_tile_rows;                     ///< 0x7F
    Segmentation segmentation;             ///< 0x80
    LoopFilter loop_filter;                ///< 0xE4
-    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB
-    u32 surface_params;                    ///< 0xF0
-    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4
+    INSERT_PADDING_BYTES_NOINIT(21);       ///< 0xEB

    [[nodiscard]] Vp9PictureInfo Convert() const {
        return {
-            .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
-            .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
-            .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
-            .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0,
-            .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0,
-            .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0,
-            .show_frame = false,
+            .bitstream_size = bitstream_size,
+            .frame_offsets{},
            .ref_frame_sign_bias = ref_frame_sign_bias,
            .base_q_index = base_q_index,
            .y_dc_delta_q = y_dc_delta_q,
            .uv_dc_delta_q = uv_dc_delta_q,
            .uv_ac_delta_q = uv_ac_delta_q,
-            .lossless = lossless != 0,
            .transform_mode = tx_mode,
-            .allow_high_precision_mv = allow_high_precision_mv != 0,
            .interp_filter = interp_filter,
            .reference_mode = reference_mode,
-            .comp_fixed_ref = comp_fixed_ref,
-            .comp_var_ref = comp_var_ref,
            .log2_tile_cols = log2_tile_cols,
            .log2_tile_rows = log2_tile_rows,
-            .segment_enabled = segmentation.enabled != 0,
-            .segment_map_update = segmentation.update_map != 0,
-            .segment_map_temporal_update = segmentation.temporal_update != 0,
-            .segment_abs_delta = segmentation.abs_delta,
-            .segment_feature_enable = segmentation.feature_mask,
-            .segment_feature_data = segmentation.feature_data,
-            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
-            .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) &&
-                                        !(vp9_flags == (FrameFlags::FrameSizeChanged)) &&
-                                        !(vp9_flags == (FrameFlags::IntraOnly)) &&
-                                        (vp9_flags == (FrameFlags::LastShowFrame)) &&
-                                        !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),
            .ref_deltas = loop_filter.ref_deltas,
            .mode_deltas = loop_filter.mode_deltas,
            .entropy{},
            .frame_size = current_frame_size,
            .first_level = first_level,
            .sharpness_level = sharpness_level,
-            .bitstream_size = bitstream_size,
-            .frame_offsets{},
-            .refresh_frame{},
+            .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
+            .intra_only = True(vp9_flags & FrameFlags::IntraOnly),
+            .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
+            .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
+            .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
+            .show_frame = true,
+            .lossless = lossless != 0,
+            .allow_high_precision_mv = allow_high_precision_mv != 0,
+            .segment_enabled = segmentation.enabled != 0,
+            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
        };
    }
 };
@@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);
 ASSERT_POSITION(first_level, 0x70);
 ASSERT_POSITION(segmentation, 0x80);
 ASSERT_POSITION(loop_filter, 0xE4);
-ASSERT_POSITION(surface_params, 0xF0);
 #undef ASSERT_POSITION

 #define ASSERT_POSITION(field_name, position)                                                      \
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -39,7 +39,7 @@ void Nvdec::Execute() {
        codec->Decode();
        break;
    default:
-        UNIMPLEMENTED_MSG("Unknown codec {}", static_cast<u32>(codec->GetCurrentCodec()));
+        UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName());
        break;
    }
 }
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -96,12 +96,11 @@ void Vic::Execute() {
        if (!converted_frame_buffer) {
            converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
        }
-
-        const int converted_stride{frame->width * 4};
+        const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
        u8* const converted_frame_buf_addr{converted_frame_buffer.get()};

        sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height,
-                  &converted_frame_buf_addr, &converted_stride);
+                  &converted_frame_buf_addr, converted_stride.data());

        const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
        if (blk_kind != 0) {
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:

                // These values are used by Nouveau and some games.
                AddGL = 0x8006,
-                SubtractGL = 0x8007,
-                ReverseSubtractGL = 0x8008,
-                MinGL = 0x800a,
-                MaxGL = 0x800b
+                MinGL = 0x8007,
+                MaxGL = 0x8008,
+                SubtractGL = 0x800a,
+                ReverseSubtractGL = 0x800b
            };

            enum class Factor : u32 {
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -6,7 +6,7 @@

 #include <array>
 #include <bitset>
-#include <xbyak.h>
+#include <xbyak/xbyak.h>
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "common/x64/xbyak_abi.h"
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -463,6 +463,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
        ++page_index;
        page_offset = 0;
        remaining_size -= num_bytes;
+        old_page_addr = page_addr;
    }
    split();
    return result;
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
        return shader_backend;
    }

+    bool IsAmd() const {
+        return vendor_name == "ATI Technologies Inc.";
+    }
+
 private:
    static bool TestVariableAoffi();
    static bool TestPreciseBug();
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -15,7 +15,7 @@
 #include "video_core/renderer_opengl/gl_shader_util.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/shader_notify.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"

 #if defined(_MSC_VER) && defined(NDEBUG)
 #define LAMBDA_FORCEINLINE [[msvc::forceinline]]
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -32,7 +32,7 @@
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
 #include "video_core/shader_cache.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"

 namespace OpenGL {

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
      host_info{
          .support_float16 = false,
          .support_int64 = device.HasShaderInt64(),
+          .needs_demote_reorder = device.IsAmd(),
      } {
    if (use_asynchronous_shaders) {
        workers = CreateWorkers();
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -18,10 +18,8 @@
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/util_shaders.h"
 #include "video_core/surface.h"
-#include "video_core/texture_cache/format_lookup_table.h"
+#include "video_core/texture_cache/formatter.h"
 #include "video_core/texture_cache/samples_helper.h"
-#include "video_core/texture_cache/texture_cache.h"
-#include "video_core/textures/decoders.h"

 namespace OpenGL {
 namespace {
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -12,7 +12,7 @@
 #include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/util_shaders.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"

 namespace OpenGL {

--- a/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+namespace VideoCommon {
+template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
+}
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
        blit_screen.Recreate();
    }
    const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
-    scheduler.Flush(render_semaphore);
+    const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+    scheduler.Flush(render_semaphore, present_semaphore);
    scheduler.WaitWorker();
    swapchain.Present(render_semaphore);

--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,

        const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
        const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
-        const size_t size_bytes = GetSizeInBytes(framebuffer);

        // TODO(Rodrigo): Read this from HLE
        constexpr u32 block_height_log2 = 4;
        const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
+        const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
+                                                           framebuffer.stride, framebuffer.height,
+                                                           1, block_height_log2, 0)};
        Tegra::Texture::UnswizzleTexture(
            mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
            bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
@@ -356,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {
 void VKBlitScreen::CreateRenderPass() {
    const VkAttachmentDescription color_attachment{
        .flags = 0,
-        .format = swapchain.GetImageFormat(),
+        .format = swapchain.GetImageViewFormat(),
        .samples = VK_SAMPLE_COUNT_1_BIT,
        .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
        .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
        .unified_descriptor_binding = true,
        .support_descriptor_aliasing = true,
-        .support_int8 = true,
+        .support_int8 = device.IsInt8Supported(),
        .support_int16 = device.IsShaderInt16Supported(),
        .support_int64 = device.IsShaderInt64Supported(),
        .support_vertex_instance_id = false,
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
    host_info = Shader::HostTranslateInfo{
        .support_float16 = device.IsFloat16Supported(),
        .support_int64 = device.IsShaderInt64Supported(),
+        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
+                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
    };
 }

--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -32,7 +32,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader_cache.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"

@@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() {
    };

    const u32 color_attachment = regs.clear_buffers.RT;
-    const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
-    const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
-    if (use_color && is_color_rt) {
+    if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
        VkClearValue clear_value;
        std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));

@@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() {
        return;
    }
    VkImageAspectFlags aspect_flags = 0;
-    if (use_depth) {
+    if (use_depth && framebuffer->HasAspectDepthBit()) {
        aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
    }
-    if (use_stencil) {
+    if (use_stencil && framebuffer->HasAspectStencilBit()) {
        aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
    }
+    if (aspect_flags == 0) {
+        return;
+    }
    scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
                      clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
        VkClearAttachment attachment;
@@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
    const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
    const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
    if (regs.stencil_two_side_enable) {
-        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
-            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
-                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
-                                   MaxwellToVK::ComparisonOp(compare));
-        });
-    } else {
+        // Separate stencil op per face
        const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
        const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
        const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
@@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
                                   MaxwellToVK::StencilOp(back_zfail),
                                   MaxwellToVK::ComparisonOp(back_compare));
        });
+    } else {
+        // Front face defines the stencil op of both faces
+        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+                                   MaxwellToVK::ComparisonOp(compare));
+        });
    }
 }

--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
    worker_thread.join();
 }

-void VKScheduler::Flush(VkSemaphore semaphore) {
-    SubmitExecution(semaphore);
+void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    SubmitExecution(signal_semaphore, wait_semaphore);
    AllocateNewContext();
 }

-void VKScheduler::Finish(VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
    const u64 presubmit_tick = CurrentTick();
-    SubmitExecution(semaphore);
+    SubmitExecution(signal_semaphore, wait_semaphore);
    WaitWorker();
    Wait(presubmit_tick);
    AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
    });
 }

-void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
+void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
    EndPendingOperations();
    InvalidateState();

    const u64 signal_value = master_semaphore->NextTick();
-    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
        cmdbuf.End();
-
-        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
-
-        const u64 wait_value = signal_value - 1;
-        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+
+        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
        const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, semaphore};
+        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
+        const std::array wait_values{signal_value - 1, u64(1)};
+        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+        };

        const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
            .pNext = nullptr,
-            .waitSemaphoreValueCount = 1,
-            .pWaitSemaphoreValues = &wait_value,
+            .waitSemaphoreValueCount = num_wait_semaphores,
+            .pWaitSemaphoreValues = wait_values.data(),
            .signalSemaphoreValueCount = num_signal_semaphores,
            .pSignalSemaphoreValues = signal_values.data(),
        };
        const VkSubmitInfo submit_info{
            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
            .pNext = &timeline_si,
-            .waitSemaphoreCount = 1,
-            .pWaitSemaphores = &timeline_semaphore,
-            .pWaitDstStageMask = &wait_stage_mask,
+            .waitSemaphoreCount = num_wait_semaphores,
+            .pWaitSemaphores = wait_semaphores.data(),
+            .pWaitDstStageMask = wait_stage_masks.data(),
            .commandBufferCount = 1,
            .pCommandBuffers = cmdbuf.address(),
            .signalSemaphoreCount = num_signal_semaphores,
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,10 +34,10 @@ public:
    ~VKScheduler();

    /// Sends the current execution context to the GPU.
-    void Flush(VkSemaphore semaphore = nullptr);
+    void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(VkSemaphore semaphore = nullptr);
+    void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);

    /// Waits for the worker thread to finish executing everything. After this function returns it's
    /// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:

    void AllocateWorkerCommandBuffer();

-    void SubmitExecution(VkSemaphore semaphore);
+    void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);

    void AllocateNewContext();

--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -20,16 +20,15 @@ namespace Vulkan {

 namespace {

-VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
+VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
    if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
        VkSurfaceFormatKHR format;
        format.format = VK_FORMAT_B8G8R8A8_UNORM;
        format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
        return format;
    }
-    const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
-        const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
-        return format.format == request_format &&
+    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+        return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
               format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
    });
    return found != formats.end() ? *found : formats[0];
@@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
 }

 void VKSwapchain::Present(VkSemaphore render_semaphore) {
-    const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
-    const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
    const auto present_queue{device.GetPresentQueue()};
    const VkPresentInfoKHR present_info{
        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
        .pNext = nullptr,
-        .waitSemaphoreCount = render_semaphore ? 2U : 1U,
-        .pWaitSemaphores = semaphores.data(),
+        .waitSemaphoreCount = render_semaphore ? 1U : 0U,
+        .pWaitSemaphores = &render_semaphore,
        .swapchainCount = 1,
        .pSwapchains = swapchain.address(),
        .pImageIndices = &image_index,
@@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
    const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
    const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};

-    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
+    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
    const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};

    u32 requested_image_count{capabilities.minImageCount + 1};
@@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
        swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
        swapchain_ci.pQueueFamilyIndices = queue_indices.data();
    }
+    static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
+    VkImageFormatListCreateInfo format_list{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .viewFormatCount = static_cast<u32>(view_formats.size()),
+        .pViewFormats = view_formats.data(),
+    };
+    if (device.IsKhrSwapchainMutableFormatEnabled()) {
+        format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
+        swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
+    }
    // Request the size again to reduce the possibility of a TOCTOU race condition.
    const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
    swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,

    images = swapchain.GetImages();
    image_count = static_cast<u32>(images.size());
-    image_format = surface_format.format;
+    image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
 }

 void VKSwapchain::CreateSemaphores() {
@@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {
        .flags = 0,
        .image = {},
        .viewType = VK_IMAGE_VIEW_TYPE_2D,
-        .format = image_format,
+        .format = image_view_format,
        .components =
            {
                .r = VK_COMPONENT_SWIZZLE_IDENTITY,
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -68,8 +68,12 @@ public:
        return *image_views[index];
    }

-    VkFormat GetImageFormat() const {
-        return image_format;
+    VkFormat GetImageViewFormat() const {
+        return image_view_format;
+    }
+
+    VkSemaphore CurrentPresentSemaphore() const {
+        return *present_semaphores[frame_index];
    }

 private:
@@ -96,7 +100,7 @@ private:
    u32 image_index{};
    u32 frame_index{};

-    VkFormat image_format{};
+    VkFormat image_view_format{};
    VkExtent2D extent{};

    bool current_srgb{};
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -19,6 +19,8 @@
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/texture_cache/formatter.h"
+#include "video_core/texture_cache/samples_helper.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -1184,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
        renderpass_key.depth_format = depth_buffer->format;
        num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
        images[num_images] = depth_buffer->ImageHandle();
-        image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
+        const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer);
+        image_ranges[num_images] = subresource_range;
        samples = depth_buffer->Samples();
        ++num_images;
+        has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
+        has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
    } else {
        renderpass_key.depth_format = PixelFormat::Invalid;
    }
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -9,7 +9,7 @@

 #include "shader_recompiler/shader_info.h"
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"

@@ -232,6 +232,18 @@ public:
        return image_ranges;
    }

+    [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
+        return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+    }
+
+    [[nodiscard]] bool HasAspectDepthBit() const noexcept {
+        return has_depth;
+    }
+
+    [[nodiscard]] bool HasAspectStencilBit() const noexcept {
+        return has_stencil;
+    }
+
 private:
    vk::Framebuffer framebuffer;
    VkRenderPass renderpass{};
@@ -241,6 +253,8 @@ private:
    u32 num_images = 0;
    std::array<VkImage, 9> images{};
    std::array<VkImageSubresourceRange, 9> image_ranges{};
+    bool has_depth{};
+    bool has_stencil{};
 };

 struct TextureCacheParams {
--- a/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+namespace VideoCommon {
+template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
+}
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -80,7 +80,7 @@ struct ImageBase {
    VAddr cpu_addr_end = 0;

    u64 modification_tick = 0;
-    u64 frame_tick = 0;
+    size_t lru_index = SIZE_MAX;

    std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};

--- a/src/video_core/texture_cache/image_view_info.cpp
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -6,7 +6,7 @@

 #include "common/assert.h"
 #include "video_core/texture_cache/image_view_info.h"
-#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/texture_cache/texture_cache_base.h"
 #include "video_core/texture_cache/types.h"
 #include "video_core/textures/texture.h"

@@ -14,6 +14,8 @@ namespace VideoCommon {

 namespace {

+using Tegra::Texture::TextureType;
+
 constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();

 [[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -4,48 +4,10 @@

 #pragma once

-#include <algorithm>
-#include <array>
-#include <bit>
-#include <memory>
-#include <mutex>
-#include <optional>
-#include <span>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <boost/container/small_vector.hpp>
-
 #include "common/alignment.h"
-#include "common/common_types.h"
-#include "common/literals.h"
-#include "common/logging/log.h"
-#include "common/settings.h"
-#include "video_core/compatible_formats.h"
-#include "video_core/delayed_destruction_ring.h"
 #include "video_core/dirty_flags.h"
-#include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/kepler_compute.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-#include "video_core/surface.h"
-#include "video_core/texture_cache/descriptor_table.h"
-#include "video_core/texture_cache/format_lookup_table.h"
-#include "video_core/texture_cache/formatter.h"
-#include "video_core/texture_cache/image_base.h"
-#include "video_core/texture_cache/image_info.h"
-#include "video_core/texture_cache/image_view_base.h"
-#include "video_core/texture_cache/image_view_info.h"
-#include "video_core/texture_cache/render_targets.h"
 #include "video_core/texture_cache/samples_helper.h"
-#include "video_core/texture_cache/slot_vector.h"
-#include "video_core/texture_cache/types.h"
-#include "video_core/texture_cache/util.h"
-#include "video_core/textures/texture.h"
+#include "video_core/texture_cache/texture_cache_base.h"

 namespace VideoCommon {

@@ -61,352 +23,6 @@ using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 using VideoCore::Surface::SurfaceType;
 using namespace Common::Literals;

-template <class P>
-class TextureCache {
-    /// Address shift for caching images into a hash table
-    static constexpr u64 PAGE_BITS = 20;
-
-    /// Enables debugging features to the texture cache
-    static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
-    /// Implement blits as copies between framebuffers
-    static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
-    /// True when some copies have to be emulated
-    static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
-    /// True when the API can provide info about the memory of the device.
-    static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
-
-    /// Image view ID for null descriptors
-    static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
-    /// Sampler ID for bugged sampler ids
-    static constexpr SamplerId NULL_SAMPLER_ID{0};
-
-    static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
-    static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
-
-    using Runtime = typename P::Runtime;
-    using Image = typename P::Image;
-    using ImageAlloc = typename P::ImageAlloc;
-    using ImageView = typename P::ImageView;
-    using Sampler = typename P::Sampler;
-    using Framebuffer = typename P::Framebuffer;
-
-    struct BlitImages {
-        ImageId dst_id;
-        ImageId src_id;
-        PixelFormat dst_format;
-        PixelFormat src_format;
-    };
-
-    template <typename T>
-    struct IdentityHash {
-        [[nodiscard]] size_t operator()(T value) const noexcept {
-            return static_cast<size_t>(value);
-        }
-    };
-
-public:
-    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
-                          Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
-
-    /// Notify the cache that a new frame has been queued
-    void TickFrame();
-
-    /// Return a constant reference to the given image view id
-    [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
-
-    /// Return a reference to the given image view id
-    [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
-
-    /// Mark an image as modified from the GPU
-    void MarkModification(ImageId id) noexcept;
-
-    /// Fill image_view_ids with the graphics images in indices
-    void FillGraphicsImageViews(std::span<const u32> indices,
-                                std::span<ImageViewId> image_view_ids);
-
-    /// Fill image_view_ids with the compute images in indices
-    void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
-
-    /// Get the sampler from the graphics descriptor table in the specified index
-    Sampler* GetGraphicsSampler(u32 index);
-
-    /// Get the sampler from the compute descriptor table in the specified index
-    Sampler* GetComputeSampler(u32 index);
-
-    /// Refresh the state for graphics image view and sampler descriptors
-    void SynchronizeGraphicsDescriptors();
-
-    /// Refresh the state for compute image view and sampler descriptors
-    void SynchronizeComputeDescriptors();
-
-    /// Update bound render targets and upload memory if necessary
-    /// @param is_clear True when the render targets are being used for clears
-    void UpdateRenderTargets(bool is_clear);
-
-    /// Find a framebuffer with the currently bound render targets
-    /// UpdateRenderTargets should be called before this
-    Framebuffer* GetFramebuffer();
-
-    /// Mark images in a range as modified from the CPU
-    void WriteMemory(VAddr cpu_addr, size_t size);
-
-    /// Download contents of host images to guest memory in a region
-    void DownloadMemory(VAddr cpu_addr, size_t size);
-
-    /// Remove images in a region
-    void UnmapMemory(VAddr cpu_addr, size_t size);
-
-    /// Remove images in a region
-    void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
-
-    /// Blit an image with the given parameters
-    void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
-                   const Tegra::Engines::Fermi2D::Surface& src,
-                   const Tegra::Engines::Fermi2D::Config& copy);
-
-    /// Invalidate the contents of the color buffer index
-    /// These contents become unspecified, the cache can assume aggressive optimizations.
-    void InvalidateColorBuffer(size_t index);
-
-    /// Invalidate the contents of the depth buffer
-    /// These contents become unspecified, the cache can assume aggressive optimizations.
-    void InvalidateDepthBuffer();
-
-    /// Try to find a cached image view in the given CPU address
-    [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
-
-    /// Return true when there are uncommitted images to be downloaded
-    [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
-
-    /// Return true when the caller should wait for async downloads
-    [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
-
-    /// Commit asynchronous downloads
-    void CommitAsyncFlushes();
-
-    /// Pop asynchronous downloads
-    void PopAsyncFlushes();
-
-    /// Return true when a CPU region is modified from the GPU
-    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
-
-    std::mutex mutex;
-
-private:
-    /// Iterate over all page indices in a range
-    template <typename Func>
-    static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
-        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
-        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
-        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
-            if constexpr (RETURNS_BOOL) {
-                if (func(page)) {
-                    break;
-                }
-            } else {
-                func(page);
-            }
-        }
-    }
-
-    template <typename Func>
-    static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
-        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
-        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
-        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
-            if constexpr (RETURNS_BOOL) {
-                if (func(page)) {
-                    break;
-                }
-            } else {
-                func(page);
-            }
-        }
-    }
-
-    /// Runs the Garbage Collector.
-    void RunGarbageCollector();
-
-    /// Fills image_view_ids in the image views in indices
-    void FillImageViews(DescriptorTable<TICEntry>& table,
-                        std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
-                        std::span<ImageViewId> image_view_ids);
-
-    /// Find or create an image view in the guest descriptor table
-    ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
-                               std::span<ImageViewId> cached_image_view_ids, u32 index);
-
-    /// Find or create a framebuffer with the given render target parameters
-    FramebufferId GetFramebufferId(const RenderTargets& key);
-
-    /// Refresh the contents (pixel data) of an image
-    void RefreshContents(Image& image, ImageId image_id);
-
-    /// Upload data from guest to an image
-    template <typename StagingBuffer>
-    void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
-
-    /// Find or create an image view from a guest descriptor
-    [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
-
-    /// Create a new image view from a guest descriptor
-    [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
-
-    /// Find or create an image from the given parameters
-    [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
-                                            RelaxedOptions options = RelaxedOptions{});
-
-    /// Find an image from the given parameters
-    [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
-                                    RelaxedOptions options);
-
-    /// Create an image from the given parameters
-    [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
-                                      RelaxedOptions options);
-
-    /// Create a new image and join perfectly matching existing images
-    /// Remove joined images from the cache
-    [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
-
-    /// Return a blit image pair from the given guest blit parameters
-    [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
-                                           const Tegra::Engines::Fermi2D::Surface& src);
-
-    /// Find or create a sampler from a guest descriptor sampler
-    [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
-
-    /// Find or create an image view for the given color buffer index
-    [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
-
-    /// Find or create an image view for the depth buffer
-    [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
-
-    /// Find or create a view for a render target with the given image parameters
-    [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
-                                                   bool is_clear);
-
-    /// Iterates over all the images in a region calling func
-    template <typename Func>
-    void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
-
-    template <typename Func>
-    void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
-
-    template <typename Func>
-    void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
-
-    /// Iterates over all the images in a region calling func
-    template <typename Func>
-    void ForEachSparseSegment(ImageBase& image, Func&& func);
-
-    /// Find or create an image view in the given image with the passed parameters
-    [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
-
-    /// Register image in the page table
-    void RegisterImage(ImageId image);
-
-    /// Unregister image from the page table
-    void UnregisterImage(ImageId image);
-
-    /// Track CPU reads and writes for image
-    void TrackImage(ImageBase& image, ImageId image_id);
-
-    /// Stop tracking CPU reads and writes for image
-    void UntrackImage(ImageBase& image, ImageId image_id);
-
-    /// Delete image from the cache
-    void DeleteImage(ImageId image);
-
-    /// Remove image views references from the cache
-    void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
-
-    /// Remove framebuffers using the given image views from the cache
-    void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
-
-    /// Mark an image as modified from the GPU
-    void MarkModification(ImageBase& image) noexcept;
-
-    /// Synchronize image aliases, copying data if needed
-    void SynchronizeAliases(ImageId image_id);
-
-    /// Prepare an image to be used
-    void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
-
-    /// Prepare an image view to be used
-    void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
-
-    /// Execute copies from one image to the other, even if they are incompatible
-    void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
-
-    /// Bind an image view as render target, downloading resources preemtively if needed
-    void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
-
-    /// Create a render target from a given image and image view parameters
-    [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
-        ImageId, const ImageViewInfo& view_info);
-
-    /// Returns true if the current clear parameters clear the whole image of a given image view
-    [[nodiscard]] bool IsFullClear(ImageViewId id);
-
-    Runtime& runtime;
-    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
-
-    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
-    std::vector<SamplerId> graphics_sampler_ids;
-    std::vector<ImageViewId> graphics_image_view_ids;
-
-    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
-    std::vector<SamplerId> compute_sampler_ids;
-    std::vector<ImageViewId> compute_image_view_ids;
-
-    RenderTargets render_targets;
-
-    std::unordered_map<TICEntry, ImageViewId> image_views;
-    std::unordered_map<TSCEntry, SamplerId> samplers;
-    std::unordered_map<RenderTargets, FramebufferId> framebuffers;
-
-    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
-
-    std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
-
-    VAddr virtual_invalid_space{};
-
-    bool has_deleted_images = false;
-    u64 total_used_memory = 0;
-    u64 minimum_memory;
-    u64 expected_memory;
-    u64 critical_memory;
-
-    SlotVector<Image> slot_images;
-    SlotVector<ImageMapView> slot_map_views;
-    SlotVector<ImageView> slot_image_views;
-    SlotVector<ImageAlloc> slot_image_allocs;
-    SlotVector<Sampler> slot_samplers;
-    SlotVector<Framebuffer> slot_framebuffers;
-
-    // TODO: This data structure is not optimal and it should be reworked
-    std::vector<ImageId> uncommitted_downloads;
-    std::queue<std::vector<ImageId>> committed_downloads;
-
-    static constexpr size_t TICKS_TO_DESTROY = 6;
-    DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
-    DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
-    DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
-
-    std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
-
-    u64 modification_tick = 0;
-    u64 frame_tick = 0;
-    typename SlotVector<Image>::Iterator deletion_iterator;
-};
-
 template <class P>
 TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
                              Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -426,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
    void(slot_image_views.insert(runtime, NullImageParams{}));
    void(slot_samplers.insert(runtime, sampler_descriptor));

-    deletion_iterator = slot_images.begin();
-
    if constexpr (HAS_DEVICE_MEMORY_INFO) {
        const auto device_memory = runtime.GetDeviceLocalMemory();
        const u64 possible_expected_memory = (device_memory * 3) / 10;
@@ -447,70 +61,38 @@ template <class P>
 void TextureCache<P>::RunGarbageCollector() {
    const bool high_priority_mode = total_used_memory >= expected_memory;
    const bool aggressive_mode = total_used_memory >= critical_memory;
-    const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
-    int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
-    for (; num_iterations > 0; --num_iterations) {
-        if (deletion_iterator == slot_images.end()) {
-            deletion_iterator = slot_images.begin();
-            if (deletion_iterator == slot_images.end()) {
-                break;
-            }
+    const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
+    size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
+    const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
+        if (num_iterations == 0) {
+            return true;
        }
-        auto [image_id, image_tmp] = *deletion_iterator;
-        Image* image = image_tmp; // fix clang error.
-        const bool is_alias = True(image->flags & ImageFlagBits::Alias);
-        const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
-        const bool must_download = image->IsSafeDownload();
-        bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
-        const u64 ticks_needed =
-            is_bad_overlap
-                ? ticks_to_destroy >> 4
-                : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
-        should_care |= aggressive_mode;
-        if (should_care && image->frame_tick + ticks_needed < frame_tick) {
-            if (is_bad_overlap) {
-                const bool overlap_check = std::ranges::all_of(
-                    image->overlapping_images, [&, image](const ImageId& overlap_id) {
-                        auto& overlap = slot_images[overlap_id];
-                        return overlap.frame_tick >= image->frame_tick;
-                    });
-                if (!overlap_check) {
-                    ++deletion_iterator;
-                    continue;
-                }
-            }
-            if (!is_bad_overlap && must_download) {
-                const bool alias_check = std::ranges::none_of(
-                    image->aliased_images, [&, image](const AliasedImage& alias) {
-                        auto& alias_image = slot_images[alias.id];
-                        return (alias_image.frame_tick < image->frame_tick) ||
-                               (alias_image.modification_tick < image->modification_tick);
-                    });
-
-                if (alias_check) {
-                    auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
-                    const auto copies = FullDownloadCopies(image->info);
-                    image->DownloadMemory(map, copies);
-                    runtime.Finish();
-                    SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
-                }
-            }
-            if (True(image->flags & ImageFlagBits::Tracked)) {
-                UntrackImage(*image, image_id);
-            }
-            UnregisterImage(image_id);
-            DeleteImage(image_id);
-            if (is_bad_overlap) {
-                ++num_iterations;
-            }
+        --num_iterations;
+        auto& image = slot_images[image_id];
+        const bool must_download = image.IsSafeDownload();
+        if (!high_priority_mode && must_download) {
+            return false;
        }
-        ++deletion_iterator;
-    }
+        if (must_download) {
+            auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
+            const auto copies = FullDownloadCopies(image.info);
+            image.DownloadMemory(map, copies);
+            runtime.Finish();
+            SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+        }
+        if (True(image.flags & ImageFlagBits::Tracked)) {
+            UntrackImage(image, image_id);
+        }
+        UnregisterImage(image_id);
+        DeleteImage(image_id);
+        return false;
+    };
+    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
 }

 template <class P>
 void TextureCache<P>::TickFrame() {
-    if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
+    if (total_used_memory > minimum_memory) {
        RunGarbageCollector();
    }
    sentenced_images.Tick();
@@ -820,40 +402,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
    }
 }

-template <class P>
-void TextureCache<P>::InvalidateColorBuffer(size_t index) {
-    ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
-    color_buffer_id = FindColorBuffer(index, false);
-    if (!color_buffer_id) {
-        LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
-        return;
-    }
-    // When invalidating a color buffer, the old contents are no longer relevant
-    ImageView& color_buffer = slot_image_views[color_buffer_id];
-    Image& image = slot_images[color_buffer.image_id];
-    image.flags &= ~ImageFlagBits::CpuModified;
-    image.flags &= ~ImageFlagBits::GpuModified;
-
-    runtime.InvalidateColorBuffer(color_buffer, index);
-}
-
-template <class P>
-void TextureCache<P>::InvalidateDepthBuffer() {
-    ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
-    depth_buffer_id = FindDepthBuffer(false);
-    if (!depth_buffer_id) {
-        LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
-        return;
-    }
-    // When invalidating the depth buffer, the old contents are no longer relevant
-    ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
-    image.flags &= ~ImageFlagBits::CpuModified;
-    image.flags &= ~ImageFlagBits::GpuModified;
-
-    ImageView& depth_buffer = slot_image_views[depth_buffer_id];
-    runtime.InvalidateDepthBuffer(depth_buffer);
-}
-
 template <class P>
 typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
    // TODO: Properly implement this
@@ -1495,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
    }
    total_used_memory += Common::AlignUp(tentative_size, 1024);
+    image.lru_index = lru_cache.Insert(image_id, frame_tick);
+
    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
                   [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
    if (False(image.flags & ImageFlagBits::Sparse)) {
@@ -1532,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
        tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
    }
    total_used_memory -= Common::AlignUp(tentative_size, 1024);
+    lru_cache.Free(image.lru_index);
    const auto& clear_page_table =
        [this, image_id](
            u64 page,
@@ -1801,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
    if (is_modification) {
        MarkModification(image);
    }
-    image.frame_tick = frame_tick;
+    lru_cache.Touch(image.lru_index, frame_tick);
 }

 template <class P>
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -0,0 +1,391 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <mutex>
+#include <span>
+#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/literals.h"
+#include "common/lru_cache.h"
+#include "video_core/compatible_formats.h"
+#include "video_core/delayed_destruction_ring.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/surface.h"
+#include "video_core/texture_cache/descriptor_table.h"
+#include "video_core/texture_cache/image_base.h"
+#include "video_core/texture_cache/image_info.h"
+#include "video_core/texture_cache/image_view_info.h"
+#include "video_core/texture_cache/render_targets.h"
+#include "video_core/texture_cache/slot_vector.h"
+#include "video_core/texture_cache/types.h"
+#include "video_core/texture_cache/util.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::SwizzleSource;
+using Tegra::Texture::TICEntry;
+using Tegra::Texture::TSCEntry;
+using VideoCore::Surface::GetFormatType;
+using VideoCore::Surface::IsCopyCompatible;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using namespace Common::Literals;
+
+template <class P>
+class TextureCache {
+    /// Address shift for caching images into a hash table
+    static constexpr u64 PAGE_BITS = 20;
+
+    /// Enables debugging features to the texture cache
+    static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
+    /// Implement blits as copies between framebuffers
+    static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
+    /// True when some copies have to be emulated
+    static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
+    /// True when the API can provide info about the memory of the device.
+    static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
+
+    /// Image view ID for null descriptors
+    static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
+    /// Sampler ID for bugged sampler ids
+    static constexpr SamplerId NULL_SAMPLER_ID{0};
+
+    static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
+    static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+
+    using Runtime = typename P::Runtime;
+    using Image = typename P::Image;
+    using ImageAlloc = typename P::ImageAlloc;
+    using ImageView = typename P::ImageView;
+    using Sampler = typename P::Sampler;
+    using Framebuffer = typename P::Framebuffer;
+
+    struct BlitImages {
+        ImageId dst_id;
+        ImageId src_id;
+        PixelFormat dst_format;
+        PixelFormat src_format;
+    };
+
+    template <typename T>
+    struct IdentityHash {
+        [[nodiscard]] size_t operator()(T value) const noexcept {
+            return static_cast<size_t>(value);
+        }
+    };
+
+public:
+    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
+                          Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
+
+    /// Notify the cache that a new frame has been queued
+    void TickFrame();
+
+    /// Return a constant reference to the given image view id
+    [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
+
+    /// Return a reference to the given image view id
+    [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
+
+    /// Mark an image as modified from the GPU
+    void MarkModification(ImageId id) noexcept;
+
+    /// Fill image_view_ids with the graphics images in indices
+    void FillGraphicsImageViews(std::span<const u32> indices,
+                                std::span<ImageViewId> image_view_ids);
+
+    /// Fill image_view_ids with the compute images in indices
+    void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
+
+    /// Get the sampler from the graphics descriptor table in the specified index
+    Sampler* GetGraphicsSampler(u32 index);
+
+    /// Get the sampler from the compute descriptor table in the specified index
+    Sampler* GetComputeSampler(u32 index);
+
+    /// Refresh the state for graphics image view and sampler descriptors
+    void SynchronizeGraphicsDescriptors();
+
+    /// Refresh the state for compute image view and sampler descriptors
+    void SynchronizeComputeDescriptors();
+
+    /// Update bound render targets and upload memory if necessary
+    /// @param is_clear True when the render targets are being used for clears
+    void UpdateRenderTargets(bool is_clear);
+
+    /// Find a framebuffer with the currently bound render targets
+    /// UpdateRenderTargets should be called before this
+    Framebuffer* GetFramebuffer();
+
+    /// Mark images in a range as modified from the CPU
+    void WriteMemory(VAddr cpu_addr, size_t size);
+
+    /// Download contents of host images to guest memory in a region
+    void DownloadMemory(VAddr cpu_addr, size_t size);
+
+    /// Remove images in a region
+    void UnmapMemory(VAddr cpu_addr, size_t size);
+
+    /// Remove images in a region
+    void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
+
+    /// Blit an image with the given parameters
+    void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
+                   const Tegra::Engines::Fermi2D::Surface& src,
+                   const Tegra::Engines::Fermi2D::Config& copy);
+
+    /// Try to find a cached image view in the given CPU address
+    [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
+
+    /// Return true when there are uncommitted images to be downloaded
+    [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
+
+    /// Return true when the caller should wait for async downloads
+    [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
+
+    /// Commit asynchronous downloads
+    void CommitAsyncFlushes();
+
+    /// Pop asynchronous downloads
+    void PopAsyncFlushes();
+
+    /// Return true when a CPU region is modified from the GPU
+    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
+
+    std::mutex mutex;
+
+private:
+    /// Iterate over all page indices in a range
+    template <typename Func>
+    static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
+        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+            if constexpr (RETURNS_BOOL) {
+                if (func(page)) {
+                    break;
+                }
+            } else {
+                func(page);
+            }
+        }
+    }
+
+    template <typename Func>
+    static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
+        static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
+        const u64 page_end = (addr + size - 1) >> PAGE_BITS;
+        for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+            if constexpr (RETURNS_BOOL) {
+                if (func(page)) {
+                    break;
+                }
+            } else {
+                func(page);
+            }
+        }
+    }
+
+    /// Runs the Garbage Collector.
+    void RunGarbageCollector();
+
+    /// Fills image_view_ids in the image views in indices
+    void FillImageViews(DescriptorTable<TICEntry>& table,
+                        std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
+                        std::span<ImageViewId> image_view_ids);
+
+    /// Find or create an image view in the guest descriptor table
+    ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
+                               std::span<ImageViewId> cached_image_view_ids, u32 index);
+
+    /// Find or create a framebuffer with the given render target parameters
+    FramebufferId GetFramebufferId(const RenderTargets& key);
+
+    /// Refresh the contents (pixel data) of an image
+    void RefreshContents(Image& image, ImageId image_id);
+
+    /// Upload data from guest to an image
+    template <typename StagingBuffer>
+    void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
+
+    /// Find or create an image view from a guest descriptor
+    [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
+
+    /// Create a new image view from a guest descriptor
+    [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
+
+    /// Find or create an image from the given parameters
+    [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+                                            RelaxedOptions options = RelaxedOptions{});
+
+    /// Find an image from the given parameters
+    [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
+                                    RelaxedOptions options);
+
+    /// Create an image from the given parameters
+    [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
+                                      RelaxedOptions options);
+
+    /// Create a new image and join perfectly matching existing images
+    /// Remove joined images from the cache
+    [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
+
+    /// Return a blit image pair from the given guest blit parameters
+    [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
+                                           const Tegra::Engines::Fermi2D::Surface& src);
+
+    /// Find or create a sampler from a guest descriptor sampler
+    [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
+
+    /// Find or create an image view for the given color buffer index
+    [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
+
+    /// Find or create an image view for the depth buffer
+    [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
+
+    /// Find or create a view for a render target with the given image parameters
+    [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
+                                                   bool is_clear);
+
+    /// Iterates over all the images in a region calling func
+    template <typename Func>
+    void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
+
+    template <typename Func>
+    void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+    template <typename Func>
+    void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+
+    /// Iterates over all the images in a region calling func
+    template <typename Func>
+    void ForEachSparseSegment(ImageBase& image, Func&& func);
+
+    /// Find or create an image view in the given image with the passed parameters
+    [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
+
+    /// Register image in the page table
+    void RegisterImage(ImageId image);
+
+    /// Unregister image from the page table
+    void UnregisterImage(ImageId image);
+
+    /// Track CPU reads and writes for image
+    void TrackImage(ImageBase& image, ImageId image_id);
+
+    /// Stop tracking CPU reads and writes for image
+    void UntrackImage(ImageBase& image, ImageId image_id);
+
+    /// Delete image from the cache
+    void DeleteImage(ImageId image);
+
+    /// Remove image views references from the cache
+    void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
+
+    /// Remove framebuffers using the given image views from the cache
+    void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
+
+    /// Mark an image as modified from the GPU
+    void MarkModification(ImageBase& image) noexcept;
+
+    /// Synchronize image aliases, copying data if needed
+    void SynchronizeAliases(ImageId image_id);
+
+    /// Prepare an image to be used
+    void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
+
+    /// Prepare an image view to be used
+    void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
+
+    /// Execute copies from one image to the other, even if they are incompatible
+    void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
+
+    /// Bind an image view as render target, downloading resources preemtively if needed
+    void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
+
+    /// Create a render target from a given image and image view parameters
+    [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
+        ImageId, const ImageViewInfo& view_info);
+
+    /// Returns true if the current clear parameters clear the whole image of a given image view
+    [[nodiscard]] bool IsFullClear(ImageViewId id);
+
+    Runtime& runtime;
+    VideoCore::RasterizerInterface& rasterizer;
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+
+    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+    std::vector<SamplerId> graphics_sampler_ids;
+    std::vector<ImageViewId> graphics_image_view_ids;
+
+    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+    std::vector<SamplerId> compute_sampler_ids;
+    std::vector<ImageViewId> compute_image_view_ids;
+
+    RenderTargets render_targets;
+
+    std::unordered_map<TICEntry, ImageViewId> image_views;
+    std::unordered_map<TSCEntry, SamplerId> samplers;
+    std::unordered_map<RenderTargets, FramebufferId> framebuffers;
+
+    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
+    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
+
+    std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
+
+    VAddr virtual_invalid_space{};
+
+    bool has_deleted_images = false;
+    u64 total_used_memory = 0;
+    u64 minimum_memory;
+    u64 expected_memory;
+    u64 critical_memory;
+
+    SlotVector<Image> slot_images;
+    SlotVector<ImageMapView> slot_map_views;
+    SlotVector<ImageView> slot_image_views;
+    SlotVector<ImageAlloc> slot_image_allocs;
+    SlotVector<Sampler> slot_samplers;
+    SlotVector<Framebuffer> slot_framebuffers;
+
+    // TODO: This data structure is not optimal and it should be reworked
+    std::vector<ImageId> uncommitted_downloads;
+    std::queue<std::vector<ImageId>> committed_downloads;
+
+    struct LRUItemParams {
+        using ObjectType = ImageId;
+        using TickType = u64;
+    };
+    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
+
+    static constexpr size_t TICKS_TO_DESTROY = 6;
+    DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
+    DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
+    DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
+
+    std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
+
+    u64 modification_tick = 0;
+    u64 frame_tick = 0;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
                const u32 unswizzled_offset =
                    slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;

-                if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
-                    offset >= input.size()) {
-                    // TODO(Rodrigo): This is an out of bounds access that should never happen. To
-                    // avoid crashing the emulator, break.
-                    ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
-                    break;
-                }
-
                u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
                const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];

@@ -84,34 +76,107 @@ template <bool TO_LINEAR>
 void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
             u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
    switch (bytes_per_pixel) {
-    case 1:
-        return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height,
+#define BPP_CASE(x)                                                                                \
+    case x:                                                                                        \
+        return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height,        \
                                         block_depth, stride_alignment);
-    case 2:
-        return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height,
-                                         block_depth, stride_alignment);
-    case 3:
-        return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height,
-                                         block_depth, stride_alignment);
-    case 4:
-        return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height,
-                                         block_depth, stride_alignment);
-    case 6:
-        return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height,
-                                         block_depth, stride_alignment);
-    case 8:
-        return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height,
-                                         block_depth, stride_alignment);
-    case 12:
-        return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height,
-                                          block_depth, stride_alignment);
-    case 16:
-        return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height,
-                                          block_depth, stride_alignment);
+        BPP_CASE(1)
+        BPP_CASE(2)
+        BPP_CASE(3)
+        BPP_CASE(4)
+        BPP_CASE(6)
+        BPP_CASE(8)
+        BPP_CASE(12)
+        BPP_CASE(16)
+#undef BPP_CASE
    default:
        UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
    }
 }
+
+template <u32 BYTES_PER_PIXEL>
+void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
+                    u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit,
+                    u32 offset_x, u32 offset_y) {
+    const u32 block_height = 1U << block_height_bit;
+    const u32 image_width_in_gobs =
+        (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
+    for (u32 line = 0; line < subrect_height; ++line) {
+        const u32 dst_y = line + offset_y;
+        const u32 gob_address_y =
+            (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
+            ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
+        const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
+        for (u32 x = 0; x < subrect_width; ++x) {
+            const u32 dst_x = x + offset_x;
+            const u32 gob_address =
+                gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height;
+            const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X];
+            const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL;
+
+            const u8* const source_line = unswizzled_data + unswizzled_offset;
+            u8* const dest_addr = swizzled_data + swizzled_offset;
+            std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL);
+        }
+    }
+}
+
+template <u32 BYTES_PER_PIXEL>
+void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height,
+                      u32 origin_x, u32 origin_y, u8* output, const u8* input) {
+    const u32 stride = width * BYTES_PER_PIXEL;
+    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
+
+    const u32 block_height_mask = (1U << block_height) - 1;
+    const u32 x_shift = GOB_SIZE_SHIFT + block_height;
+
+    for (u32 line = 0; line < line_count; ++line) {
+        const u32 src_y = line + origin_y;
+        const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
+
+        const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
+        const u32 src_offset_y = (block_y >> block_height) * block_size +
+                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
+        for (u32 column = 0; column < line_length_in; ++column) {
+            const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL;
+            const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
+
+            const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
+            const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL;
+
+            std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL);
+        }
+    }
+}
+
+template <u32 BYTES_PER_PIXEL>
+void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
+                         u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output,
+                         const u8* input) {
+    UNIMPLEMENTED_IF(origin_x > 0);
+    UNIMPLEMENTED_IF(origin_y > 0);
+
+    const u32 stride = width * BYTES_PER_PIXEL;
+    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
+    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
+
+    const u32 block_height_mask = (1U << block_height) - 1;
+    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
+
+    for (u32 line = 0; line < line_count; ++line) {
+        const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
+        const u32 block_y = line / GOB_SIZE_Y;
+        const u32 dst_offset_y =
+            (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
+        for (u32 x = 0; x < line_length_in; ++x) {
+            const u32 dst_offset =
+                ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
+            const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch;
+            std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL);
+        }
+    }
+}
 } // Anonymous namespace

 void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
@@ -131,81 +196,67 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
                    u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data,
                    u32 block_height_bit, u32 offset_x, u32 offset_y) {
-    const u32 block_height = 1U << block_height_bit;
-    const u32 image_width_in_gobs =
-        (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X;
-    for (u32 line = 0; line < subrect_height; ++line) {
-        const u32 dst_y = line + offset_y;
-        const u32 gob_address_y =
-            (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
-            ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
-        const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
-        for (u32 x = 0; x < subrect_width; ++x) {
-            const u32 dst_x = x + offset_x;
-            const u32 gob_address =
-                gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height;
-            const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X];
-            const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel;
-
-            const u8* const source_line = unswizzled_data + unswizzled_offset;
-            u8* const dest_addr = swizzled_data + swizzled_offset;
-            std::memcpy(dest_addr, source_line, bytes_per_pixel);
-        }
+    switch (bytes_per_pixel) {
+#define BPP_CASE(x)                                                                                \
+    case x:                                                                                        \
+        return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width,      \
+                                 swizzled_data, unswizzled_data, block_height_bit, offset_x,       \
+                                 offset_y);
+        BPP_CASE(1)
+        BPP_CASE(2)
+        BPP_CASE(3)
+        BPP_CASE(4)
+        BPP_CASE(6)
+        BPP_CASE(8)
+        BPP_CASE(12)
+        BPP_CASE(16)
+#undef BPP_CASE
+    default:
+        UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
    }
 }

 void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
                      u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
-    const u32 stride = width * bytes_per_pixel;
-    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
-    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
-
-    const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = GOB_SIZE_SHIFT + block_height;
-
-    for (u32 line = 0; line < line_count; ++line) {
-        const u32 src_y = line + origin_y;
-        const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
-
-        const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
-        const u32 src_offset_y = (block_y >> block_height) * block_size +
-                                 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
-        for (u32 column = 0; column < line_length_in; ++column) {
-            const u32 src_x = (column + origin_x) * bytes_per_pixel;
-            const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
-
-            const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
-            const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
-
-            std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
-        }
+    switch (bytes_per_pixel) {
+#define BPP_CASE(x)                                                                                \
+    case x:                                                                                        \
+        return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height,         \
+                                   origin_x, origin_y, output, input);
+        BPP_CASE(1)
+        BPP_CASE(2)
+        BPP_CASE(3)
+        BPP_CASE(4)
+        BPP_CASE(6)
+        BPP_CASE(8)
+        BPP_CASE(12)
+        BPP_CASE(16)
+#undef BPP_CASE
+    default:
+        UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
    }
 }

 void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height,
                         u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x,
                         u32 origin_y, u8* output, const u8* input) {
-    UNIMPLEMENTED_IF(origin_x > 0);
-    UNIMPLEMENTED_IF(origin_y > 0);
-
-    const u32 stride = width * bytes_per_pixel;
-    const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
-    const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
-
-    const u32 block_height_mask = (1U << block_height) - 1;
-    const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
-
-    for (u32 line = 0; line < line_count; ++line) {
-        const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
-        const u32 block_y = line / GOB_SIZE_Y;
-        const u32 dst_offset_y =
-            (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
-        for (u32 x = 0; x < line_length_in; ++x) {
-            const u32 dst_offset =
-                ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X];
-            const u32 src_offset = x * bytes_per_pixel + line * pitch;
-            std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel);
-        }
+    switch (bytes_per_pixel) {
+#define BPP_CASE(x)                                                                                \
+    case x:                                                                                        \
+        return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height,            \
+                                      block_height, block_depth, origin_x, origin_y, output,       \
+                                      input);
+        BPP_CASE(1)
+        BPP_CASE(2)
+        BPP_CASE(3)
+        BPP_CASE(4)
+        BPP_CASE(6)
+        BPP_CASE(8)
+        BPP_CASE(12)
+        BPP_CASE(16)
+#undef BPP_CASE
+    default:
+        UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel);
    }
 }

@@ -228,7 +279,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
            u8* dest_addr = swizzle_data + swizzled_offset;
            count++;

-            std::memcpy(dest_addr, source_line, 1);
+            *dest_addr = *source_line;
        }
    }
 }
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -159,7 +159,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
        return {raw, raw};
    } else {
        const Tegra::Texture::TextureHandle handle{raw};
-        return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
+        return {handle.tic_id, handle.tsc_id};
    }
 }

--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {

 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
-    const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
+    const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
+    const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
    const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
    auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
    auto context = emu_window.CreateSharedContext();
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -368,18 +368,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
    };
    SetNext(next, demote);

-    VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
-    if (is_float16_supported) {
-        float16_int8 = {
+    if (is_int8_supported || is_float16_supported) {
+        VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
            .pNext = nullptr,
-            .shaderFloat16 = true,
-            .shaderInt8 = false,
+            .shaderFloat16 = is_float16_supported,
+            .shaderInt8 = is_int8_supported,
        };
        SetNext(next, float16_int8);
-    } else {
+    }
+    if (!is_float16_supported) {
        LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
    }
+    if (!is_int8_supported) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively");
+    }

    if (!nv_viewport_swizzle) {
        LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
@@ -836,6 +839,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
    bool has_khr_shader_float16_int8{};
    bool has_khr_workgroup_memory_explicit_layout{};
    bool has_khr_pipeline_executable_properties{};
+    bool has_khr_image_format_list{};
+    bool has_khr_swapchain_mutable_format{};
    bool has_ext_subgroup_size_control{};
    bool has_ext_transform_feedback{};
    bool has_ext_custom_border_color{};
@@ -885,6 +890,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
        test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
        test(has_khr_workgroup_memory_explicit_layout,
             VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+        test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
+        test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
+             false);
        test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
        if (Settings::values.enable_nsight_aftermath) {
            test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
@@ -909,6 +917,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {

        physical.GetFeatures2KHR(features);
        is_float16_supported = float16_int8_features.shaderFloat16;
+        is_int8_supported = float16_int8_features.shaderInt8;
        extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
    }
    if (has_ext_subgroup_size_control) {
@@ -1062,6 +1071,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
            khr_pipeline_executable_properties = true;
        }
    }
+    if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
+        extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
+        extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
+        khr_swapchain_mutable_format = true;
+    }
    if (khr_push_descriptor) {
        VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
        push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -139,11 +139,16 @@ public:
        return is_optimal_astc_supported;
    }

-    /// Returns true if the device supports float16 natively
+    /// Returns true if the device supports float16 natively.
    bool IsFloat16Supported() const {
        return is_float16_supported;
    }

+    /// Returns true if the device supports int8 natively.
+    bool IsInt8Supported() const {
+        return is_int8_supported;
+    }
+
    /// Returns true if the device warp size can potentially be bigger than guest's warp size.
    bool IsWarpSizePotentiallyBiggerThanGuest() const {
        return is_warp_potentially_bigger;
@@ -219,6 +224,11 @@ public:
        return khr_pipeline_executable_properties;
    }

+    /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
+    bool IsKhrSwapchainMutableFormatEnabled() const {
+        return khr_swapchain_mutable_format;
+    }
+
    /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
    bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
        return khr_workgroup_memory_explicit_layout;
@@ -367,7 +377,8 @@ private:
    u64 device_access_memory{};                 ///< Total size of device local memory in bytes.
    u32 max_push_descriptors{};                 ///< Maximum number of push descriptors
    bool is_optimal_astc_supported{};           ///< Support for native ASTC.
-    bool is_float16_supported{};                ///< Support for float16 arithmetics.
+    bool is_float16_supported{};                ///< Support for float16 arithmetic.
+    bool is_int8_supported{};                   ///< Support for int8 arithmetic.
    bool is_warp_potentially_bigger{};          ///< Host warp size can be bigger than guest.
    bool is_formatless_image_load_supported{};  ///< Support for shader image read without format.
    bool is_depth_bounds_supported{};           ///< Support for depth bounds.
@@ -384,6 +395,7 @@ private:
    bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
    bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor.
    bool khr_pipeline_executable_properties{};   ///< Support for executable properties.
+    bool khr_swapchain_mutable_format{};         ///< Support for VK_KHR_swapchain_mutable_format.
    bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.
    bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.
    bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted.
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -228,7 +228,9 @@ void MemoryCommit::Release() {

 MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
    : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()},
-      export_allocations{export_allocations_} {}
+      export_allocations{export_allocations_},
+      buffer_image_granularity{
+          device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}

 MemoryAllocator::~MemoryAllocator() = default;

@@ -258,7 +260,9 @@ MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage
 }

 MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
-    auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
+    VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image);
+    requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity);
+    auto commit = Commit(requirements, usage);
    image.BindMemory(commit.Memory(), commit.Offset());
    return commit;
 }
--- a/Show More
+++ b/Show More