vulkan_debug_callback: Ignore InvalidCommandBuffer-VkDescriptorSet errors

This validation error is spammed on some titles, asserting that VkDescriptorSet 0x0[] was destroyed. This is likely a validation layer bug when using VK_KHR_push_descriptor, which can avoid using traditional VkDescriptorSet. It should be safe to ignore for now.
Merge pull request #6943 from FernandoS27/omae-wa-mou-shindeiru
2021-09-13 23:08:59 -04:00 · 2021-09-13 17:33:15 -04:00 · 2021-09-13 23:29:57 +02:00 · 2021-09-13 17:25:56 -04:00 · 2021-09-13 17:21:22 -04:00 · 2021-09-13 17:20:07 -04:00
66 changed files with 1025 additions and 347 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS
    avutil
    swscale)

+if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+    Include(FindPkgConfig REQUIRED)
+    pkg_check_modules(LIBVA libva)
+endif()
 if (NOT YUZU_USE_BUNDLED_FFMPEG)
    # Use system installed FFmpeg
    find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@@ -540,6 +544,9 @@ endif()

 if (YUZU_USE_BUNDLED_FFMPEG)
    if (NOT WIN32)
+        # TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
+        # externals/ffmpeg/ffmpeg)
+
        # Build FFmpeg from externals
        message(STATUS "Using FFmpeg from externals")

@@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                CACHE PATH "Paths to FFmpeg libraries" FORCE)
        endforeach()

-        set(FFmpeg_INCLUDE_DIR
-            "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
-            CACHE PATH "Path to FFmpeg headers" FORCE)
+        Include(FindPkgConfig REQUIRED)
+        pkg_check_modules(LIBVA libva)
+        pkg_check_modules(CUDA cuda)
+        pkg_check_modules(FFNVCODEC ffnvcodec)
+        pkg_check_modules(VDPAU vdpau)
+
+        set(FFmpeg_HWACCEL_LIBRARIES)
+        set(FFmpeg_HWACCEL_FLAGS)
+        set(FFmpeg_HWACCEL_INCLUDE_DIRS)
+        set(FFmpeg_HWACCEL_LDFLAGS)

-        if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-            Include(FindPkgConfig REQUIRED)
-            pkg_check_modules(LIBVA libva)
-        endif()
        if(LIBVA_FOUND)
            pkg_check_modules(LIBDRM libdrm REQUIRED)
            find_package(X11 REQUIRED)
            pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
            pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
-            set(FFmpeg_LIBVA_LIBRARIES
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES
                ${LIBDRM_LIBRARIES}
                ${X11_LIBRARIES}
                ${LIBVA-DRM_LIBRARIES}
@@ -602,11 +612,56 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                --enable-hwaccel=h264_vaapi
                --enable-hwaccel=vp9_vaapi
                --enable-libdrm)
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+                ${LIBDRM_INCLUDE_DIRS}
+                ${X11_INCLUDE_DIRS}
+                ${LIBVA-DRM_INCLUDE_DIRS}
+                ${LIBVA-X11_INCLUDE_DIRS}
+                ${LIBVA_INCLUDE_DIRS}
+            )
            message(STATUS "VA-API found")
        else()
            set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
        endif()

+        if (FFNVCODEC_FOUND AND CUDA_FOUND)
+            list(APPEND FFmpeg_HWACCEL_FLAGS
+                --enable-cuvid
+                --enable-ffnvcodec
+                --enable-nvdec
+                --enable-hwaccel=h264_nvdec
+                --enable-hwaccel=vp9_nvdec
+                --extra-cflags=-I${CUDA_INCLUDE_DIRS}
+            )
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES
+                ${FFNVCODEC_LIBRARIES}
+                ${CUDA_LIBRARIES}
+            )
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
+                ${FFNVCODEC_INCLUDE_DIRS}
+                ${CUDA_INCLUDE_DIRS}
+            )
+            list(APPEND FFmpeg_HWACCEL_LDFLAGS
+                ${FFNVCODEC_LDFLAGS}
+                ${CUDA_LDFLAGS}
+            )
+            message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
+        endif()
+
+        if (VDPAU_FOUND)
+            list(APPEND FFmpeg_HWACCEL_FLAGS
+                --enable-vdpau
+                --enable-hwaccel=h264_vdpau
+                --enable-hwaccel=vp9_vdpau
+            )
+            list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
+            list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
+            list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
+            message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
+        else()
+            list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
+        endif()
+
        # `configure` parameters builds only exactly what yuzu needs from FFmpeg
        # `--disable-vdpau` is needed to avoid linking issues
        add_custom_command(
@@ -624,7 +679,6 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                    --disable-network
                    --disable-postproc
                    --disable-swresample
-                    --disable-vdpau
                    --enable-decoder=h264
                    --enable-decoder=vp9
                    --cc="${CMAKE_C_COMPILER}"
@@ -653,15 +707,26 @@ if (YUZU_USE_BUNDLED_FFMPEG)
                ${FFmpeg_BUILD_DIR}
        )

+        set(FFmpeg_INCLUDE_DIR
+            "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
+            CACHE PATH "Path to FFmpeg headers" FORCE)
+
+        set(FFmpeg_LDFLAGS
+            "${FFmpeg_HWACCEL_LDFLAGS}"
+            CACHE STRING "FFmpeg linker flags" FORCE)
+
        # ALL makes this custom target build every time
        # but it won't actually build if the DEPENDS parameter is up to date
        add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
        add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
        link_libraries(${FFmpeg_LIBVA_LIBRARIES})
-        set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
+        set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES}
            CACHE PATH "Paths to FFmpeg libraries" FORCE)
        unset(FFmpeg_BUILD_LIBRARIES)
-        unset(FFmpeg_LIBVA_LIBRARIES)
+        unset(FFmpeg_HWACCEL_FLAGS)
+        unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
+        unset(FFmpeg_HWACCEL_LDFLAGS)
+        unset(FFmpeg_HWACCEL_LIBRARIES)

        if (FFmpeg_FOUND)
            message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@@ -670,12 +735,13 @@ if (YUZU_USE_BUNDLED_FFMPEG)
        endif()
    else() # WIN32
        # Use yuzu FFmpeg binaries
-        set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
+        set(FFmpeg_EXT_NAME "ffmpeg-4.4")
        set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
        download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
        set(FFmpeg_FOUND YES)
        set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
        set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
+        set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
        set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
        set(FFmpeg_LIBRARIES
            ${FFmpeg_LIBRARY_DIR}/swscale.lib
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -53,6 +53,8 @@ add_library(common STATIC
    div_ceil.h
    dynamic_library.cpp
    dynamic_library.h
+    error.cpp
+    error.h
    fiber.cpp
    fiber.h
    fs/file.cpp
@@ -88,7 +90,6 @@ add_library(common STATIC
    microprofile.cpp
    microprofile.h
    microprofileui.h
-    misc.cpp
    nvidia_flags.cpp
    nvidia_flags.h
    page_table.cpp
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,9 +4,8 @@

 #pragma once

-#include <algorithm>
 #include <array>
-#include <string>
+#include <iterator>

 #if !defined(ARCHITECTURE_x86_64)
 #include <cstdlib> // for exit
@@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void);

 #endif // _MSC_VER ndef

-// Generic function to get last error message.
-// Call directly after the command or use the error num.
-// This function might change the error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string GetLastErrorMsg();
-
-// Like GetLastErrorMsg(), but passing an explicit error code.
-// Defined in misc.cpp.
-[[nodiscard]] std::string NativeErrorToString(int e);
-
 #define DECLARE_ENUM_FLAG_OPERATORS(type)                                                          \
    [[nodiscard]] constexpr type operator|(type a, type b) noexcept {                              \
        using T = std::underlying_type_t<type>;                                                    \
@@ -72,6 +61,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
        using T = std::underlying_type_t<type>;                                                    \
        return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b));                           \
    }                                                                                              \
+    [[nodiscard]] constexpr type operator<<(type a, type b) noexcept {                             \
+        using T = std::underlying_type_t<type>;                                                    \
+        return static_cast<type>(static_cast<T>(a) << static_cast<T>(b));                          \
+    }                                                                                              \
+    [[nodiscard]] constexpr type operator>>(type a, type b) noexcept {                             \
+        using T = std::underlying_type_t<type>;                                                    \
+        return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b));                          \
+    }                                                                                              \
    constexpr type& operator|=(type& a, type b) noexcept {                                         \
        a = a | b;                                                                                 \
        return a;                                                                                  \
@@ -84,6 +81,14 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
        a = a ^ b;                                                                                 \
        return a;                                                                                  \
    }                                                                                              \
+    constexpr type& operator<<=(type& a, type b) noexcept {                                        \
+        a = a << b;                                                                                \
+        return a;                                                                                  \
+    }                                                                                              \
+    constexpr type& operator>>=(type& a, type b) noexcept {                                        \
+        a = a >> b;                                                                                \
+        return a;                                                                                  \
+    }                                                                                              \
    [[nodiscard]] constexpr type operator~(type key) noexcept {                                    \
        using T = std::underlying_type_t<type>;                                                    \
        return static_cast<type>(~static_cast<T>(key));                                            \
--- a/src/common/error.cpp
+++ b/src/common/error.cpp
@@ -10,7 +10,9 @@
 #include <cstring>
 #endif

-#include "common/common_funcs.h"
+#include "common/error.h"
+
+namespace Common {

 std::string NativeErrorToString(int e) {
 #ifdef _WIN32
@@ -50,3 +52,5 @@ std::string GetLastErrorMsg() {
    return NativeErrorToString(errno);
 #endif
 }
+
+} // namespace Common
--- a/src/common/error.h
+++ b/src/common/error.h
@@ -0,0 +1,21 @@
+// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+
+namespace Common {
+
+// Generic function to get last error message.
+// Call directly after the command or use the error num.
+// This function might change the error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string GetLastErrorMsg();
+
+// Like GetLastErrorMsg(), but passing an explicit error code.
+// Defined in error.cpp.
+[[nodiscard]] std::string NativeErrorToString(int e);
+
+} // namespace Common
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -54,7 +54,7 @@ void LogSettings() {
    log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
    log_setting("Renderer_UseAsynchronousGpuEmulation",
                values.use_asynchronous_gpu_emulation.GetValue());
-    log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
+    log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
    log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
    log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
    log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) {
    values.use_disk_shader_cache.SetGlobal(true);
    values.gpu_accuracy.SetGlobal(true);
    values.use_asynchronous_gpu_emulation.SetGlobal(true);
-    values.use_nvdec_emulation.SetGlobal(true);
+    values.nvdec_emulation.SetGlobal(true);
    values.accelerate_astc.SetGlobal(true);
    values.use_vsync.SetGlobal(true);
    values.shader_backend.SetGlobal(true);
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -48,6 +48,12 @@ enum class FullscreenMode : u32 {
    Exclusive = 1,
 };

+enum class NvdecEmulation : u32 {
+    Off = 0,
+    CPU = 1,
+    GPU = 2,
+};
+
 /** The BasicSetting class is a simple resource manager. It defines a label and default value
 * alongside the actual value of the setting for simpler and less-error prone use with frontend
 * configurations. Setting a default value and label is required, though subclasses may deviate from
@@ -466,7 +472,7 @@ struct Values {
    RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
                                            GPUAccuracy::Extreme, "gpu_accuracy"};
    Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
-    Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
+    Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
    Setting<bool> accelerate_astc{true, "accelerate_astc"};
    Setting<bool> use_vsync{true, "use_vsync"};
    BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"};
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "common/common_funcs.h"
+#include <string>
+
+#include "common/error.h"
 #include "common/logging/log.h"
 #include "common/thread.h"
 #ifdef __APPLE__
@@ -21,8 +23,6 @@
 #include <unistd.h>
 #endif

-#include <string>
-
 #ifdef __FreeBSD__
 #define cpu_set_t cpuset_t
 #endif
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -263,6 +263,8 @@ add_library(core STATIC
    hle/service/acc/acc_u0.h
    hle/service/acc/acc_u1.cpp
    hle/service/acc/acc_u1.h
+    hle/service/acc/async_context.cpp
+    hle/service/acc/async_context.h
    hle/service/acc/errors.h
    hle/service/acc/profile_manager.cpp
    hle/service/acc/profile_manager.h
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -21,34 +21,25 @@ namespace Core {
 CpuManager::CpuManager(System& system_) : system{system_} {}
 CpuManager::~CpuManager() = default;

-void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
-    cpu_manager.RunThread(core);
+void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager,
+                             std::size_t core) {
+    cpu_manager.RunThread(stop_token, core);
 }

 void CpuManager::Initialize() {
    running_mode = true;
    if (is_multicore) {
        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
-            core_data[core].host_thread =
-                std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+            core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
        }
    } else {
-        core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
+        core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0);
    }
 }

 void CpuManager::Shutdown() {
    running_mode = false;
    Pause(false);
-    if (is_multicore) {
-        for (auto& data : core_data) {
-            data.host_thread->join();
-            data.host_thread.reset();
-        }
-    } else {
-        core_data[0].host_thread->join();
-        core_data[0].host_thread.reset();
-    }
 }

 std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
@@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) {
    }
 }

-void CpuManager::RunThread(std::size_t core) {
+void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
    /// Initialization
    system.RegisterCoreThread(core);
    std::string name;
@@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) {
            return;
        }

+        if (stop_token.stop_requested()) {
+            break;
+        }
+
        auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
        data.is_running = true;
        Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -78,9 +78,9 @@ private:
    void SingleCoreRunSuspendThread();
    void SingleCorePause(bool paused);

-    static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
+    static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);

-    void RunThread(std::size_t core);
+    void RunThread(std::stop_token stop_token, std::size_t core);

    struct CoreData {
        std::shared_ptr<Common::Fiber> host_context;
@@ -89,7 +89,7 @@ private:
        std::atomic<bool> is_running;
        std::atomic<bool> is_paused;
        std::atomic<bool> initialized;
-        std::unique_ptr<std::thread> host_thread;
+        std::jthread host_thread;
    };

    std::atomic<bool> running_mode{};
--- a/src/core/file_sys/kernel_executable.h
+++ b/src/core/file_sys/kernel_executable.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <string>
 #include <vector>

 #include "common/common_funcs.h"
--- a/src/core/hle/api_version.h
+++ b/src/core/hle/api_version.h
@@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0";

 // Atmosphere version constants.

-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19;
-constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0;
+constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0;
+
+constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) {
+    return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev};
+}
+
+constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) {
+    return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0);
+}

 constexpr u32 GetTargetFirmware() {
-    return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 |
-           u32{HOS_VERSION_MICRO} << 8 | 0U;
+    return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO);
 }

 } // namespace HLE::ApiVersion
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <functional>
 #include <memory>
 #include <string>
 #include <unordered_map>
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -23,6 +23,7 @@
 #include "core/hle/service/acc/acc_su.h"
 #include "core/hle/service/acc/acc_u0.h"
 #include "core/hle/service/acc/acc_u1.h"
+#include "core/hle/service/acc/async_context.h"
 #include "core/hle/service/acc/errors.h"
 #include "core/hle/service/acc/profile_manager.h"
 #include "core/hle/service/glue/arp.h"
@@ -454,22 +455,6 @@ public:
        : IProfileCommon{system_, "IProfileEditor", true, user_id_, profile_manager_} {}
 };

-class IAsyncContext final : public ServiceFramework<IAsyncContext> {
-public:
-    explicit IAsyncContext(Core::System& system_) : ServiceFramework{system_, "IAsyncContext"} {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetSystemEvent"},
-            {1, nullptr, "Cancel"},
-            {2, nullptr, "HasDone"},
-            {3, nullptr, "GetResult"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-};
-
 class ISessionObject final : public ServiceFramework<ISessionObject> {
 public:
    explicit ISessionObject(Core::System& system_, Common::UUID)
@@ -504,16 +489,44 @@ public:
    }
 };

+class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext {
+public:
+    explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} {
+        MarkComplete();
+    }
+    ~EnsureTokenIdCacheAsyncInterface() = default;
+
+    void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultSuccess);
+    }
+
+protected:
+    bool IsComplete() const override {
+        return true;
+    }
+
+    void Cancel() override {}
+
+    ResultCode GetResult() const override {
+        return ResultSuccess;
+    }
+};
+
 class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
 public:
    explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_)
-        : ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_} {
+        : ServiceFramework{system_, "IManagerForApplication"},
+          ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)},
+          user_id{user_id_} {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
            {1, &IManagerForApplication::GetAccountId, "GetAccountId"},
-            {2, nullptr, "EnsureIdTokenCacheAsync"},
-            {3, nullptr, "LoadIdTokenCache"},
+            {2, &IManagerForApplication::EnsureIdTokenCacheAsync, "EnsureIdTokenCacheAsync"},
+            {3, &IManagerForApplication::LoadIdTokenCache, "LoadIdTokenCache"},
            {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
            {150, nullptr, "CreateAuthorizationRequest"},
            {160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
@@ -540,6 +553,20 @@ private:
        rb.PushRaw<u64>(user_id.GetNintendoID());
    }

+    void EnsureIdTokenCacheAsync(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+        rb.Push(ResultSuccess);
+        rb.PushIpcInterface(ensure_token_id);
+    }
+
+    void LoadIdTokenCache(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+        ensure_token_id->LoadIdTokenCache(ctx);
+    }
+
    void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_ACC, "(STUBBED) called");

@@ -562,6 +589,7 @@ private:
        rb.Push(ResultSuccess);
    }

+    std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{};
    Common::UUID user_id{Common::INVALID_UUID};
 };

--- a/src/core/hle/service/acc/async_context.cpp
+++ b/src/core/hle/service/acc/async_context.cpp
@@ -0,0 +1,68 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/core.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/service/acc/async_context.h"
+
+namespace Service::Account {
+IAsyncContext::IAsyncContext(Core::System& system_)
+    : ServiceFramework{system_, "IAsyncContext"}, compeletion_event{system_.Kernel()} {
+
+    Kernel::KAutoObject::Create(std::addressof(compeletion_event));
+    compeletion_event.Initialize("IAsyncContext:CompletionEvent");
+
+    // clang-format off
+    static const FunctionInfo functions[] = {
+        {0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
+        {1, &IAsyncContext::Cancel, "Cancel"},
+        {2, &IAsyncContext::HasDone, "HasDone"},
+        {3, &IAsyncContext::GetResult, "GetResult"},
+    };
+    // clang-format on
+
+    RegisterHandlers(functions);
+}
+
+void IAsyncContext::GetSystemEvent(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(ResultSuccess);
+    rb.PushCopyObjects(compeletion_event.GetReadableEvent());
+}
+
+void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    Cancel();
+    MarkComplete();
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
+void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    is_complete.store(IsComplete());
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(ResultSuccess);
+    rb.Push(is_complete.load());
+}
+
+void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(GetResult());
+}
+
+void IAsyncContext::MarkComplete() {
+    is_complete.store(true);
+    compeletion_event.GetWritableEvent().Signal();
+}
+
+} // namespace Service::Account
--- a/src/core/hle/service/acc/async_context.h
+++ b/src/core/hle/service/acc/async_context.h
@@ -0,0 +1,37 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include "core/hle/kernel/k_event.h"
+#include "core/hle/service/service.h"
+
+namespace Core {
+class System;
+}
+
+namespace Service::Account {
+
+class IAsyncContext : public ServiceFramework<IAsyncContext> {
+public:
+    explicit IAsyncContext(Core::System& system_);
+
+    void GetSystemEvent(Kernel::HLERequestContext& ctx);
+    void Cancel(Kernel::HLERequestContext& ctx);
+    void HasDone(Kernel::HLERequestContext& ctx);
+    void GetResult(Kernel::HLERequestContext& ctx);
+
+protected:
+    virtual bool IsComplete() const = 0;
+    virtual void Cancel() = 0;
+    virtual ResultCode GetResult() const = 0;
+
+    void MarkComplete();
+
+    std::atomic<bool> is_complete{false};
+    Kernel::KEvent compeletion_event;
+};
+
+} // namespace Service::Account
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1270,7 +1270,8 @@ void ILibraryAppletCreator::CreateHandleStorage(Kernel::HLERequestContext& ctx)
 IApplicationFunctions::IApplicationFunctions(Core::System& system_)
    : ServiceFramework{system_, "IApplicationFunctions"}, gpu_error_detected_event{system.Kernel()},
      friend_invitation_storage_channel_event{system.Kernel()},
-      health_warning_disappeared_system_event{system.Kernel()} {
+      notification_storage_channel_event{system.Kernel()}, health_warning_disappeared_system_event{
+                                                               system.Kernel()} {
    // clang-format off
    static const FunctionInfo functions[] = {
        {1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1322,7 +1323,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
        {131, nullptr, "SetDelayTimeToAbortOnGpuError"},
        {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
        {141, &IApplicationFunctions::TryPopFromFriendInvitationStorageChannel, "TryPopFromFriendInvitationStorageChannel"},
-        {150, nullptr, "GetNotificationStorageChannelEvent"},
+        {150, &IApplicationFunctions::GetNotificationStorageChannelEvent, "GetNotificationStorageChannelEvent"},
        {151, nullptr, "TryPopFromNotificationStorageChannel"},
        {160, &IApplicationFunctions::GetHealthWarningDisappearedSystemEvent, "GetHealthWarningDisappearedSystemEvent"},
        {170, nullptr, "SetHdcpAuthenticationActivated"},
@@ -1340,11 +1341,14 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)

    Kernel::KAutoObject::Create(std::addressof(gpu_error_detected_event));
    Kernel::KAutoObject::Create(std::addressof(friend_invitation_storage_channel_event));
+    Kernel::KAutoObject::Create(std::addressof(notification_storage_channel_event));
    Kernel::KAutoObject::Create(std::addressof(health_warning_disappeared_system_event));

    gpu_error_detected_event.Initialize("IApplicationFunctions:GpuErrorDetectedSystemEvent");
    friend_invitation_storage_channel_event.Initialize(
        "IApplicationFunctions:FriendInvitationStorageChannelEvent");
+    notification_storage_channel_event.Initialize(
+        "IApplicationFunctions:NotificationStorageChannelEvent");
    health_warning_disappeared_system_event.Initialize(
        "IApplicationFunctions:HealthWarningDisappearedSystemEvent");
 }
@@ -1762,6 +1766,14 @@ void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(
    rb.Push(ERR_NO_DATA_IN_CHANNEL);
 }

+void IApplicationFunctions::GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 1};
+    rb.Push(ResultSuccess);
+    rb.PushCopyObjects(notification_storage_channel_event.GetReadableEvent());
+}
+
 void IApplicationFunctions::GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_AM, "called");

--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -295,6 +295,7 @@ private:
    void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
    void GetFriendInvitationStorageChannelEvent(Kernel::HLERequestContext& ctx);
    void TryPopFromFriendInvitationStorageChannel(Kernel::HLERequestContext& ctx);
+    void GetNotificationStorageChannelEvent(Kernel::HLERequestContext& ctx);
    void GetHealthWarningDisappearedSystemEvent(Kernel::HLERequestContext& ctx);

    bool launch_popped_application_specific = false;
@@ -302,6 +303,7 @@ private:
    s32 previous_program_index{-1};
    Kernel::KEvent gpu_error_detected_event;
    Kernel::KEvent friend_invitation_storage_channel_event;
+    Kernel::KEvent notification_storage_channel_event;
    Kernel::KEvent health_warning_disappeared_system_event;
 };

--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -97,14 +97,24 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons

 ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const {
    std::string path(Common::FS::SanitizePath(path_));
-    auto dir = GetDirectoryRelativeWrapped(backing, Common::FS::GetParentPath(path));
-    if (dir == nullptr || Common::FS::GetFilename(Common::FS::GetParentPath(path)).empty()) {
-        dir = backing;
-    }
-    auto new_dir = dir->CreateSubdirectory(Common::FS::GetFilename(path));
-    if (new_dir == nullptr) {
-        // TODO(DarkLordZach): Find a better error code for this
-        return ResultUnknown;
+
+    // NOTE: This is inaccurate behavior. CreateDirectory is not recursive.
+    // CreateDirectory should return PathNotFound if the parent directory does not exist.
+    // This is here temporarily in order to have UMM "work" in the meantime.
+    // TODO (Morph): Remove this when a hardware test verifies the correct behavior.
+    const auto components = Common::FS::SplitPathComponents(path);
+    std::string relative_path;
+    for (const auto& component : components) {
+        // Skip empty path components
+        if (component.empty()) {
+            continue;
+        }
+        relative_path = Common::FS::SanitizePath(relative_path + '/' + component);
+        auto new_dir = backing->CreateSubdirectory(relative_path);
+        if (new_dir == nullptr) {
+            // TODO(DarkLordZach): Find a better error code for this
+            return ResultUnknown;
+        }
    }
    return ResultSuccess;
 }
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -15,6 +15,20 @@
 namespace Service::HID {
 class Controller_Touchscreen final : public ControllerBase {
 public:
+    enum class TouchScreenModeForNx : u8 {
+        UseSystemSetting,
+        Finger,
+        Heat2,
+    };
+
+    struct TouchScreenConfigurationForNx {
+        TouchScreenModeForNx mode;
+        INSERT_PADDING_BYTES_NOINIT(0x7);
+        INSERT_PADDING_BYTES_NOINIT(0xF); // Reserved
+    };
+    static_assert(sizeof(TouchScreenConfigurationForNx) == 0x17,
+                  "TouchScreenConfigurationForNx is an invalid size");
+
    explicit Controller_Touchscreen(Core::System& system_);
    ~Controller_Touchscreen() override;

--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -331,7 +331,7 @@ Hid::Hid(Core::System& system_)
        {529, nullptr, "SetDisallowedPalmaConnection"},
        {1000, &Hid::SetNpadCommunicationMode, "SetNpadCommunicationMode"},
        {1001, &Hid::GetNpadCommunicationMode, "GetNpadCommunicationMode"},
-        {1002, nullptr, "SetTouchScreenConfiguration"},
+        {1002, &Hid::SetTouchScreenConfiguration, "SetTouchScreenConfiguration"},
        {1003, nullptr, "IsFirmwareUpdateNeededForNotification"},
        {2000, nullptr, "ActivateDigitizer"},
    };
@@ -1631,6 +1631,18 @@ void Hid::GetNpadCommunicationMode(Kernel::HLERequestContext& ctx) {
                    .GetNpadCommunicationMode());
 }

+void Hid::SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto touchscreen_mode{rp.PopRaw<Controller_Touchscreen::TouchScreenConfigurationForNx>()};
+    const auto applet_resource_user_id{rp.Pop<u64>()};
+
+    LOG_WARNING(Service_HID, "(STUBBED) called, touchscreen_mode={}, applet_resource_user_id={}",
+                touchscreen_mode.mode, applet_resource_user_id);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(ResultSuccess);
+}
+
 class HidDbg final : public ServiceFramework<HidDbg> {
 public:
    explicit HidDbg(Core::System& system_) : ServiceFramework{system_, "hid:dbg"} {
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -159,6 +159,7 @@ private:
    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
    void SetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
    void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
+    void SetTouchScreenConfiguration(Kernel::HLERequestContext& ctx);

    enum class VibrationDeviceType : u32 {
        Unknown = 0,
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                        const Common::Rectangle<int>& crop_rect) {
-    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
+    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
              addr, offset, width, height, stride, format);

-    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
-    const Tegra::FramebufferConfig framebuffer{
-        addr,      offset,   width, height, stride, static_cast<PixelFormat>(format),
-        transform, crop_rect};
+    const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
+    const Tegra::FramebufferConfig framebuffer{addr,   offset,       width,     height,
+                                               stride, pixel_format, transform, crop_rect};

    system.GetPerfStats().EndSystemFrame();
    system.GPU().SwapBuffers(&framebuffer);
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -42,7 +42,9 @@ struct IGBPBuffer {
    u32_le index;
    INSERT_PADDING_WORDS(3);
    u32_le gpu_buffer_id;
-    INSERT_PADDING_WORDS(17);
+    INSERT_PADDING_WORDS(6);
+    u32_le external_format;
+    INSERT_PADDING_WORDS(10);
    u32_le nvmap_handle;
    u32_le offset;
    INSERT_PADDING_WORDS(60);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -298,7 +298,7 @@ void NVFlinger::Compose() {
        auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
        ASSERT(nvdisp);

-        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
+        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

--- a/src/core/network/network.cpp
+++ b/src/core/network/network.cpp
@@ -7,7 +7,8 @@
 #include <limits>
 #include <utility>
 #include <vector>
-#include "common/common_funcs.h"
+
+#include "common/error.h"

 #ifdef _WIN32
 #include <winsock2.h>
@@ -223,7 +224,7 @@ Errno GetAndLogLastError() {
    if (err == Errno::AGAIN) {
        return err;
    }
-    LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e));
+    LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
    return err;
 }

--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
    return "Unknown";
 }

+static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
+    switch (backend) {
+    case Settings::NvdecEmulation::Off:
+        return "Off";
+    case Settings::NvdecEmulation::CPU:
+        return "CPU";
+    case Settings::NvdecEmulation::GPU:
+        return "GPU";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
    u64 telemetry_id{};
    const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
             TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
    AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
             Settings::values.use_asynchronous_gpu_emulation.GetValue());
-    AddField(field_type, "Renderer_UseNvdecEmulation",
-             Settings::values.use_nvdec_emulation.GetValue());
+    AddField(field_type, "Renderer_NvdecEmulation",
+             TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
    AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
    AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
    AddField(field_type, "Renderer_ShaderBackend",
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -11,6 +11,8 @@

 namespace Shader::Backend::GLSL {
 namespace {
+constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
+
 void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
    IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
    if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
    ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
    SetInBoundsFlag(ctx, inst);
 }
+
+std::string_view BallotIndex(EmitContext& ctx) {
+    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
+        return ".x";
+    }
+    return "[gl_SubGroupInvocationARB>>5]";
+}
+
+std::string GetMask(EmitContext& ctx, std::string_view mask) {
+    const auto ballot_index{BallotIndex(ctx)};
+    return fmt::format("uint(uvec2({}){})", mask, ballot_index);
+}
 } // Anonymous namespace

 void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
+    ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
 }

 void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
+        return;
    }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
 }

 void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
        ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
+        return;
    }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
 }

 void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
        ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
-    } else {
-        const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
-        const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
-        const auto value{fmt::format("({}^{})", ballot, active_mask)};
-        ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
+        return;
    }
+    const auto ballot_index{BallotIndex(ctx)};
+    const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
+    const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
+    const auto value{fmt::format("({}^{})", ballot, active_mask)};
+    ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
 }

 void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
-    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
-        ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
-    } else {
-        ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
-    }
+    const auto ballot_index{BallotIndex(ctx)};
+    ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
 }

 void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
 }

 void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
 }

 void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
 }

 void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
 }

 void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
-    ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
+    ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
 }

 void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
-                      std::string_view index, std::string_view clamp,
-                      std::string_view segmentation_mask) {
+                      std::string_view index, std::string_view clamp, std::string_view seg_mask) {
    if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
        return;
    }
-    const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
-    const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};

-    const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
+    const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
+    const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
+    const auto max_thread_id{
+        ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
+
+    const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
    const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
    SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
 }

 void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
-                   std::string_view clamp, std::string_view segmentation_mask) {
+                   std::string_view clamp, std::string_view seg_mask) {
    if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
        return;
    }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
    ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
    SetInBoundsFlag(ctx, inst);
    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
 }

 void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
-                     std::string_view index, std::string_view clamp,
-                     std::string_view segmentation_mask) {
+                     std::string_view index, std::string_view clamp, std::string_view seg_mask) {
    if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
        return;
    }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
    SetInBoundsFlag(ctx, inst);
    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,

 void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
                          std::string_view index, std::string_view clamp,
-                          std::string_view segmentation_mask) {
+                          std::string_view seg_mask) {
    if (ctx.profile.support_gl_warp_intrinsics) {
-        UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
+        UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
        return;
    }
-    const auto thread_id{"gl_SubGroupInvocationARB"};
-    const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
-    const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
+    const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
+    const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
+    const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
+
+    const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
+    const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
    ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
    SetInBoundsFlag(ctx, inst);
    ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@

 namespace Shader::Backend::SPIRV {
 namespace {
+constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
+
 enum class Operation {
    Increment,
    Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
        return pointer_type;
    }
 }
+
+size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
+                              size_t start_offset) {
+    for (size_t location = start_offset; location < used_locations.size(); ++location) {
+        if (!used_locations.test(location)) {
+            return location;
+        }
+    }
+    throw RuntimeError("Unable to get an unused location for legacy attribute");
+}
 } // Anonymous namespace

 void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
        loads[IR::Attribute::TessellationEvaluationPointV]) {
        tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
    }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
        const AttributeType input_type{runtime_info.generic_input_types[index]};
        if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
        if (input_type == AttributeType::Disabled) {
            continue;
        }
+        used_locations.set(index);
        const Id type{GetAttributeType(*this, input_type)};
        const Id id{DefineInput(*this, type, true)};
        Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
            break;
        }
    }
+    size_t previous_unused_location = 0;
+    if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineInput(*this, F32[4], true)};
+        Decorate(id, spv::Decoration::Location, location);
+        input_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineInput(*this, F32[4], true)};
+            Decorate(id, spv::Decoration::Location, location);
+            input_fixed_fnc_textures[index] = id;
+        }
+    }
    if (stage == Stage::TessellationEval) {
        for (size_t index = 0; index < info.uses_patches.size(); ++index) {
            if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
        viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
                                     spv::BuiltIn::ViewportMaskNV);
    }
+    std::bitset<IR::NUM_GENERICS> used_locations{};
    for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
        if (info.stores.Generic(index)) {
            DefineGenericOutput(*this, index, invocations);
+            used_locations.set(index);
+        }
+    }
+    size_t previous_unused_location = 0;
+    if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
+        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
+        previous_unused_location = location;
+        used_locations.set(location);
+        const Id id{DefineOutput(*this, F32[4], invocations)};
+        Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
+        output_front_color = id;
+    }
+    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
+        if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
+            const size_t location =
+                FindNextUnusedLocation(used_locations, previous_unused_location);
+            previous_unused_location = location;
+            used_locations.set(location);
+            const Id id{DefineOutput(*this, F32[4], invocations)};
+            Decorate(id, spv::Decoration::Location, location);
+            output_fixed_fnc_textures[index] = id;
        }
    }
    switch (stage) {
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
    Id write_global_func_u32x4{};

    Id input_position{};
+    Id input_front_color{};
+    std::array<Id, 10> input_fixed_fnc_textures{};
    std::array<Id, 32> input_generics{};

    Id output_point_size{};
    Id output_position{};
+    Id output_front_color{};
+    std::array<Id, 10> output_fixed_fnc_textures{};
    std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};

    Id output_tess_level_outer{};
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
    }
 }

+bool IsFixedFncTexture(IR::Attribute attribute) {
+    return attribute >= IR::Attribute::FixedFncTexture0S &&
+           attribute <= IR::Attribute::FixedFncTexture9Q;
+}
+
+u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
+}
+
+u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
+    if (!IsFixedFncTexture(attribute)) {
+        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
+    }
+    return static_cast<u32>(attribute) % 4u;
+}
+
 template <typename... Args>
 Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
    if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
            return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
        }
    }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const u32 element{FixedFncTextureAttributeElement(attr)};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
+                                 element_id);
+    }
    switch (attr) {
    case IR::Attribute::PointSize:
        return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
        const Id element_id{ctx.Const(element)};
        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
    }
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        const u32 element{static_cast<u32>(attr) % 4};
+        const Id element_id{ctx.Const(element)};
+        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
+    }
    case IR::Attribute::ClipDistance0:
    case IR::Attribute::ClipDistance1:
    case IR::Attribute::ClipDistance2:
@@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
        const Id value{ctx.OpLoad(type->id, pointer)};
        return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
    }
+    if (IsFixedFncTexture(attr)) {
+        const u32 index{FixedFncTextureAttributeIndex(attr)};
+        const Id attr_id{ctx.input_fixed_fnc_textures[index]};
+        const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
+        return ctx.OpLoad(ctx.F32[1], attr_ptr);
+    }
    switch (attr) {
    case IR::Attribute::PrimitiveId:
        return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
    case IR::Attribute::PositionW:
        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
                                                  ctx.Const(element)));
+    case IR::Attribute::ColorFrontDiffuseR:
+    case IR::Attribute::ColorFrontDiffuseG:
+    case IR::Attribute::ColorFrontDiffuseB:
+    case IR::Attribute::ColorFrontDiffuseA: {
+        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
+                                                  ctx.Const(element)));
+    }
    case IR::Attribute::InstanceId:
        if (ctx.profile.support_vertex_instance_id) {
            return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,8 +7,13 @@

 namespace Shader::Backend::SPIRV {
 namespace {
+Id GetThreadId(EmitContext& ctx) {
+    return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
+}
+
 Id WarpExtract(EmitContext& ctx, Id value) {
-    const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
    return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
 }

@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
    return ctx.OpSelect(ctx.U32[1], in_range,
                        ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
 }
+
+Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
+    const Id thirty_two{ctx.Const(32u)};
+    const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
+    const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+    return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+}
 } // Anonymous namespace

 Id EmitLaneId(EmitContext& ctx) {
-    const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id id{GetThreadId(ctx)};
    if (!ctx.profile.warp_size_potentially_larger_than_guest) {
        return id;
    }
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
 Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                    Id segmentation_mask) {
    const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        const Id thirty_two{ctx.Const(32u)};
+        const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
+        const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
+        const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
+        index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
+        clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
+    }
    const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
    const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};

@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla

 Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                 Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
    const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
    const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,

 Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                   Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
    const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam

 Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
                        Id segmentation_mask) {
-    const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
+    const Id thread_id{GetThreadId(ctx)};
+    if (ctx.profile.warp_size_potentially_larger_than_guest) {
+        clamp = GetUpperClamp(ctx, thread_id, clamp);
+    }
    const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
    const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
    const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -231,6 +231,7 @@ endif()

 target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
 target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
+target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})

 add_dependencies(video_core host_shaders)
 target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <vector>
 #include "common/assert.h"
+#include "common/settings.h"
 #include "video_core/command_classes/codecs/codec.h"
 #include "video_core/command_classes/codecs/h264.h"
 #include "video_core/command_classes/codecs/vp9.h"
@@ -16,44 +17,28 @@ extern "C" {
 }

 namespace Tegra {
-#if defined(LIBVA_FOUND)
-// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
 namespace {
-constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
-    "i915",
-    "amdgpu",
-};
+constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
+constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;

-AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
+void AVPacketDeleter(AVPacket* ptr) {
+    av_packet_free(&ptr);
+}
+
+using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
+
+AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
    for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
-        if (*p == AV_PIX_FMT_VAAPI) {
-            return AV_PIX_FMT_VAAPI;
+        if (*p == av_codec_ctx->pix_fmt) {
+            return av_codec_ctx->pix_fmt;
        }
    }
    LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
-    return *pix_fmts;
-}
-
-bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
-    AVDictionary* hwdevice_options = nullptr;
-    av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
-    for (const auto& driver : VAAPI_DRIVERS) {
-        av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
-        const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
-                                                          nullptr, hwdevice_options, 0);
-        if (hwdevice_error >= 0) {
-            LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
-            av_dict_free(&hwdevice_options);
-            return true;
-        }
-        LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
-    }
-    LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
-    av_dict_free(&hwdevice_options);
-    return false;
+    av_buffer_unref(&av_codec_ctx->hw_device_ctx);
+    av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
+    return PREFERRED_CPU_FMT;
 }
 } // namespace
-#endif

 void AVFrameDeleter(AVFrame* ptr) {
    av_frame_free(&ptr);
@@ -68,56 +53,110 @@ Codec::~Codec() {
        return;
    }
    // Free libav memory
-    avcodec_send_packet(av_codec_ctx, nullptr);
-    AVFrame* av_frame = av_frame_alloc();
-    avcodec_receive_frame(av_codec_ctx, av_frame);
-    avcodec_flush_buffers(av_codec_ctx);
-    av_frame_free(&av_frame);
-    avcodec_close(av_codec_ctx);
-    av_buffer_unref(&av_hw_device);
+    avcodec_free_context(&av_codec_ctx);
+    av_buffer_unref(&av_gpu_decoder);
 }

-void Codec::InitializeHwdec() {
-    // Prioritize integrated GPU to mitigate bandwidth bottlenecks
+bool Codec::CreateGpuAvDevice() {
 #if defined(LIBVA_FOUND)
-    if (CreateVaapiHwdevice(&av_hw_device)) {
-        const auto hw_device_ctx = av_buffer_ref(av_hw_device);
-        ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
-        av_codec_ctx->hw_device_ctx = hw_device_ctx;
-        av_codec_ctx->get_format = GetHwFormat;
+    static constexpr std::array<const char*, 3> VAAPI_DRIVERS = {
+        "i915",
+        "iHD",
+        "amdgpu",
+    };
+    AVDictionary* hwdevice_options = nullptr;
+    av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
+    for (const auto& driver : VAAPI_DRIVERS) {
+        av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
+        const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
+                                                          nullptr, hwdevice_options, 0);
+        if (hwdevice_error >= 0) {
+            LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
+            av_dict_free(&hwdevice_options);
+            av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
+            return true;
+        }
+        LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
+    }
+    LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
+    av_dict_free(&hwdevice_options);
+#endif
+    static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
+    static constexpr std::array GPU_DECODER_TYPES{
+        AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+        AV_HWDEVICE_TYPE_D3D11VA,
+#else
+        AV_HWDEVICE_TYPE_VDPAU,
+#endif
+    };
+    for (const auto& type : GPU_DECODER_TYPES) {
+        const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
+        if (hwdevice_res < 0) {
+            LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
+                      av_hwdevice_get_type_name(type), hwdevice_res);
+            continue;
+        }
+        for (int i = 0;; i++) {
+            const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
+            if (!config) {
+                LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
+                          av_codec->name, av_hwdevice_get_type_name(type));
+                break;
+            }
+            if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
+                av_codec_ctx->pix_fmt = config->pix_fmt;
+                LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void Codec::InitializeAvCodecContext() {
+    av_codec_ctx = avcodec_alloc_context3(av_codec);
+    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
+}
+
+void Codec::InitializeGpuDecoder() {
+    if (!CreateGpuAvDevice()) {
+        av_buffer_unref(&av_gpu_decoder);
        return;
    }
-#endif
-    // TODO more GPU accelerated decoders
+    auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
+    ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
+    av_codec_ctx->hw_device_ctx = hw_device_ctx;
+    av_codec_ctx->get_format = GetGpuFormat;
 }

 void Codec::Initialize() {
-    AVCodecID codec;
-    switch (current_codec) {
-    case NvdecCommon::VideoCodec::H264:
-        codec = AV_CODEC_ID_H264;
-        break;
-    case NvdecCommon::VideoCodec::Vp9:
-        codec = AV_CODEC_ID_VP9;
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+    const AVCodecID codec = [&] {
+        switch (current_codec) {
+        case NvdecCommon::VideoCodec::H264:
+            return AV_CODEC_ID_H264;
+        case NvdecCommon::VideoCodec::Vp9:
+            return AV_CODEC_ID_VP9;
+        default:
+            UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
+            return AV_CODEC_ID_NONE;
+        }
+    }();
+    av_codec = avcodec_find_decoder(codec);
+
+    InitializeAvCodecContext();
+    if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
+        InitializeGpuDecoder();
+    }
+    if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
+        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
+        avcodec_free_context(&av_codec_ctx);
+        av_buffer_unref(&av_gpu_decoder);
        return;
    }
-    av_codec = avcodec_find_decoder(codec);
-    av_codec_ctx = avcodec_alloc_context3(av_codec);
-    av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
-    InitializeHwdec();
    if (!av_codec_ctx->hw_device_ctx) {
        LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
    }
-    const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
-    if (av_error < 0) {
-        LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
-        avcodec_close(av_codec_ctx);
-        av_buffer_unref(&av_hw_device);
-        return;
-    }
    initialized = true;
 }

@@ -133,6 +172,9 @@ void Codec::Decode() {
    if (is_first_frame) {
        Initialize();
    }
+    if (!initialized) {
+        return;
+    }
    bool vp9_hidden_frame = false;
    std::vector<u8> frame_data;
    if (current_codec == NvdecCommon::VideoCodec::H264) {
@@ -141,50 +183,48 @@ void Codec::Decode() {
        frame_data = vp9_decoder->ComposeFrameHeader(state);
        vp9_hidden_frame = vp9_decoder->WasFrameHidden();
    }
-    AVPacket packet{};
-    av_init_packet(&packet);
-    packet.data = frame_data.data();
-    packet.size = static_cast<s32>(frame_data.size());
-    if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
-        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
+    AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
+    if (!packet) {
+        LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
+        return;
+    }
+    packet->data = frame_data.data();
+    packet->size = static_cast<s32>(frame_data.size());
+    if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
+        LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
        return;
    }
    // Only receive/store visible frames
    if (vp9_hidden_frame) {
        return;
    }
-    AVFrame* hw_frame = av_frame_alloc();
-    AVFrame* sw_frame = hw_frame;
-    ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
-    if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
+    AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
+    AVFramePtr final_frame{nullptr, AVFrameDeleter};
+    ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
+    if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
        LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
-        av_frame_free(&hw_frame);
        return;
    }
-    if (!hw_frame->width || !hw_frame->height) {
+    if (initial_frame->width == 0 || initial_frame->height == 0) {
        LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
-        av_frame_free(&hw_frame);
        return;
    }
-#if defined(LIBVA_FOUND)
-    // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
-    if (hw_frame->format == AV_PIX_FMT_VAAPI) {
-        sw_frame = av_frame_alloc();
-        ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
+    if (av_codec_ctx->hw_device_ctx) {
+        final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
+        ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
        // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
        // because Intel drivers crash unless using AV_PIX_FMT_NV12
-        sw_frame->format = AV_PIX_FMT_NV12;
-        const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
-        ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
-        av_frame_free(&hw_frame);
+        final_frame->format = PREFERRED_GPU_FMT;
+        const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
+        ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
+    } else {
+        final_frame = std::move(initial_frame);
    }
-#endif
-    if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
-        UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
-        av_frame_free(&sw_frame);
+    if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
+        UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
        return;
    }
-    av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
+    av_frames.push(std::move(final_frame));
    if (av_frames.size() > 10) {
        LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
        av_frames.pop();
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <memory>
+#include <string_view>
 #include <queue>
 #include "common/common_types.h"
 #include "video_core/command_classes/nvdec_common.h"
@@ -50,18 +51,23 @@ public:

    /// Returns the value of current_codec
    [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
+
    /// Return name of the current codec
    [[nodiscard]] std::string_view GetCurrentCodecName() const;

 private:
-    void InitializeHwdec();
+    void InitializeAvCodecContext();
+
+    void InitializeGpuDecoder();
+
+    bool CreateGpuAvDevice();

    bool initialized{};
    NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};

    AVCodec* av_codec{nullptr};
-    AVBufferRef* av_hw_device{nullptr};
    AVCodecContext* av_codec_ctx{nullptr};
+    AVBufferRef* av_gpu_decoder{nullptr};

    GPU& gpu;
    const NvdecCommon::NvdecRegisters& state;
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister
    const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
                           (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);

-    writer.WriteUe(16);
+    // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
+    writer.WriteUe(6); // Max number of reference frames
    writer.WriteBit(false);
    writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
    writer.WriteUe(pic_height - 1);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:

                // These values are used by Nouveau and some games.
                AddGL = 0x8006,
-                SubtractGL = 0x8007,
-                ReverseSubtractGL = 0x8008,
-                MinGL = 0x800a,
-                MaxGL = 0x800b
+                MinGL = 0x8007,
+                MaxGL = 0x8008,
+                SubtractGL = 0x800a,
+                ReverseSubtractGL = 0x800b
            };

            enum class Factor : u32 {
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
        blit_screen.Recreate();
    }
    const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
-    scheduler.Flush(render_semaphore);
+    const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
+    scheduler.Flush(render_semaphore, present_semaphore);
    scheduler.WaitWorker();
    swapchain.Present(render_semaphore);

--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {
 void VKBlitScreen::CreateRenderPass() {
    const VkAttachmentDescription color_attachment{
        .flags = 0,
-        .format = swapchain.GetImageFormat(),
+        .format = swapchain.GetImageViewFormat(),
        .samples = VK_SAMPLE_COUNT_1_BIT,
        .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <mutex>
 #include <span>
 #include <vector>
@@ -18,7 +19,6 @@ namespace Vulkan {
 // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
 constexpr size_t SETS_GROW_RATE = 16;
 constexpr s32 SCORE_THRESHOLD = 3;
-constexpr u32 SETS_PER_POOL = 64;

 struct DescriptorBank {
    DescriptorBankInfo info;
@@ -58,11 +58,12 @@ static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
 static void AllocatePool(const Device& device, DescriptorBank& bank) {
    std::array<VkDescriptorPoolSize, 6> pool_sizes;
    size_t pool_cursor{};
+    const u32 sets_per_pool = device.GetSetsPerPool();
    const auto add = [&](VkDescriptorType type, u32 count) {
        if (count > 0) {
            pool_sizes[pool_cursor++] = {
                .type = type,
-                .descriptorCount = count * SETS_PER_POOL,
+                .descriptorCount = count * sets_per_pool,
            };
        }
    };
@@ -77,7 +78,7 @@ static void AllocatePool(const Device& device, DescriptorBank& bank) {
        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
        .pNext = nullptr,
        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
-        .maxSets = SETS_PER_POOL,
+        .maxSets = sets_per_pool,
        .poolSizeCount = static_cast<u32>(pool_cursor),
        .pPoolSizes = std::data(pool_sizes),
    }));
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() {
    };

    const u32 color_attachment = regs.clear_buffers.RT;
-    const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask;
-    const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
-    if (use_color && is_color_rt) {
+    if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
        VkClearValue clear_value;
        std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));

@@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() {
        return;
    }
    VkImageAspectFlags aspect_flags = 0;
-    if (use_depth) {
+    if (use_depth && framebuffer->HasAspectDepthBit()) {
        aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
    }
-    if (use_stencil) {
+    if (use_stencil && framebuffer->HasAspectStencilBit()) {
        aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
    }
+    if (aspect_flags == 0) {
+        return;
+    }
    scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
                      clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
        VkClearAttachment attachment;
@@ -764,12 +765,7 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
    const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass;
    const Maxwell::ComparisonOp compare = regs.stencil_front_func_func;
    if (regs.stencil_two_side_enable) {
-        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
-            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
-                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
-                                   MaxwellToVK::ComparisonOp(compare));
-        });
-    } else {
+        // Separate stencil op per face
        const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail;
        const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail;
        const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass;
@@ -784,6 +780,13 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
                                   MaxwellToVK::StencilOp(back_zfail),
                                   MaxwellToVK::ComparisonOp(back_compare));
        });
+    } else {
+        // Front face defines the stencil op of both faces
+        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
+            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
+                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
+                                   MaxwellToVK::ComparisonOp(compare));
+        });
    }
 }

--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {
    worker_thread.join();
 }

-void VKScheduler::Flush(VkSemaphore semaphore) {
-    SubmitExecution(semaphore);
+void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
+    SubmitExecution(signal_semaphore, wait_semaphore);
    AllocateNewContext();
 }

-void VKScheduler::Finish(VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
    const u64 presubmit_tick = CurrentTick();
-    SubmitExecution(semaphore);
+    SubmitExecution(signal_semaphore, wait_semaphore);
    WaitWorker();
    Wait(presubmit_tick);
    AllocateNewContext();
@@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {
    });
 }

-void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
+void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
    EndPendingOperations();
    InvalidateState();

    const u64 signal_value = master_semaphore->NextTick();
-    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
        cmdbuf.End();
-
-        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
-
-        const u64 wait_value = signal_value - 1;
-        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+
+        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
        const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, semaphore};
+        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
+        const std::array wait_values{signal_value - 1, u64(1)};
+        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+        };

        const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
            .pNext = nullptr,
-            .waitSemaphoreValueCount = 1,
-            .pWaitSemaphoreValues = &wait_value,
+            .waitSemaphoreValueCount = num_wait_semaphores,
+            .pWaitSemaphoreValues = wait_values.data(),
            .signalSemaphoreValueCount = num_signal_semaphores,
            .pSignalSemaphoreValues = signal_values.data(),
        };
        const VkSubmitInfo submit_info{
            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
            .pNext = &timeline_si,
-            .waitSemaphoreCount = 1,
-            .pWaitSemaphores = &timeline_semaphore,
-            .pWaitDstStageMask = &wait_stage_mask,
+            .waitSemaphoreCount = num_wait_semaphores,
+            .pWaitSemaphores = wait_semaphores.data(),
+            .pWaitDstStageMask = wait_stage_masks.data(),
            .commandBufferCount = 1,
            .pCommandBuffers = cmdbuf.address(),
            .signalSemaphoreCount = num_signal_semaphores,
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,10 +34,10 @@ public:
    ~VKScheduler();

    /// Sends the current execution context to the GPU.
-    void Flush(VkSemaphore semaphore = nullptr);
+    void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);

    /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(VkSemaphore semaphore = nullptr);
+    void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);

    /// Waits for the worker thread to finish executing everything. After this function returns it's
    /// safe to touch worker resources.
@@ -191,7 +191,7 @@ private:

    void AllocateWorkerCommandBuffer();

-    void SubmitExecution(VkSemaphore semaphore);
+    void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);

    void AllocateNewContext();

--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -110,10 +110,6 @@ public:
        return Exchange(Dirty::DepthTestEnable, false);
    }

-    bool TouchDepthBoundsEnable() {
-        return Exchange(Dirty::DepthBoundsEnable, false);
-    }
-
    bool TouchDepthWriteEnable() {
        return Exchange(Dirty::DepthWriteEnable, false);
    }
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -20,16 +20,15 @@ namespace Vulkan {

 namespace {

-VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
+VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {
    if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
        VkSurfaceFormatKHR format;
        format.format = VK_FORMAT_B8G8R8A8_UNORM;
        format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
        return format;
    }
-    const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
-        const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
-        return format.format == request_format &&
+    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) {
+        return format.format == VK_FORMAT_B8G8R8A8_UNORM &&
               format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
    });
    return found != formats.end() ? *found : formats[0];
@@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {
 }

 void VKSwapchain::Present(VkSemaphore render_semaphore) {
-    const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
-    const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
    const auto present_queue{device.GetPresentQueue()};
    const VkPresentInfoKHR present_info{
        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
        .pNext = nullptr,
-        .waitSemaphoreCount = render_semaphore ? 2U : 1U,
-        .pWaitSemaphores = semaphores.data(),
+        .waitSemaphoreCount = render_semaphore ? 1U : 0U,
+        .pWaitSemaphores = &render_semaphore,
        .swapchainCount = 1,
        .pSwapchains = swapchain.address(),
        .pImageIndices = &image_index,
@@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
    const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
    const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};

-    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
+    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
    const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};

    u32 requested_image_count{capabilities.minImageCount + 1};
@@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
        swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
        swapchain_ci.pQueueFamilyIndices = queue_indices.data();
    }
+    static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB};
+    VkImageFormatListCreateInfo format_list{
+        .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+        .pNext = nullptr,
+        .viewFormatCount = static_cast<u32>(view_formats.size()),
+        .pViewFormats = view_formats.data(),
+    };
+    if (device.IsKhrSwapchainMutableFormatEnabled()) {
+        format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list);
+        swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR;
+    }
    // Request the size again to reduce the possibility of a TOCTOU race condition.
    const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
    swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,

    images = swapchain.GetImages();
    image_count = static_cast<u32>(images.size());
-    image_format = surface_format.format;
+    image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
 }

 void VKSwapchain::CreateSemaphores() {
@@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {
        .flags = 0,
        .image = {},
        .viewType = VK_IMAGE_VIEW_TYPE_2D,
-        .format = image_format,
+        .format = image_view_format,
        .components =
            {
                .r = VK_COMPONENT_SWIZZLE_IDENTITY,
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -68,8 +68,12 @@ public:
        return *image_views[index];
    }

-    VkFormat GetImageFormat() const {
-        return image_format;
+    VkFormat GetImageViewFormat() const {
+        return image_view_format;
+    }
+
+    VkSemaphore CurrentPresentSemaphore() const {
+        return *present_semaphores[frame_index];
    }

 private:
@@ -96,7 +100,7 @@ private:
    u32 image_index{};
    u32 frame_index{};

-    VkFormat image_format{};
+    VkFormat image_view_format{};
    VkExtent2D extent{};

    bool current_srgb{};
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
        renderpass_key.depth_format = depth_buffer->format;
        num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
        images[num_images] = depth_buffer->ImageHandle();
-        image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
+        const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer);
+        image_ranges[num_images] = subresource_range;
        samples = depth_buffer->Samples();
        ++num_images;
+        has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
+        has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
    } else {
        renderpass_key.depth_format = PixelFormat::Invalid;
    }
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -232,6 +232,18 @@ public:
        return image_ranges;
    }

+    [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
+        return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+    }
+
+    [[nodiscard]] bool HasAspectDepthBit() const noexcept {
+        return has_depth;
+    }
+
+    [[nodiscard]] bool HasAspectStencilBit() const noexcept {
+        return has_stencil;
+    }
+
 private:
    vk::Framebuffer framebuffer;
    VkRenderPass renderpass{};
@@ -241,6 +253,8 @@ private:
    u32 num_images = 0;
    std::array<VkImage, 9> images{};
    std::array<VkImageSubresourceRange, 9> image_ranges{};
+    bool has_depth{};
+    bool has_stencil{};
 };

 struct TextureCacheParams {
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <filesystem>
 #include <fstream>
 #include <memory>
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <algorithm>
 #include <array>
 #include <bit>
 #include <concepts>
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
 namespace VideoCore {

 std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
-    const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
+    const auto nvdec_value = Settings::values.nvdec_emulation.GetValue();
+    const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
    const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
    auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
    auto context = emu_window.CreateSharedContext();
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -16,6 +16,7 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
    switch (static_cast<u32>(data->messageIdNumber)) {
    case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter
    case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0])
+    case 0xe8616bf2u: // Bound VkDescriptorSet 0x0[] was destroyed. Likely push_descriptor related
        return VK_FALSE;
    default:
        break;
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -243,6 +243,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
    SetupFamilies(surface);
    SetupFeatures();
    SetupProperties();
+    CollectTelemetryParameters();

    const auto queue_cis = GetDeviceQueueCreateInfos();
    const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -368,6 +369,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
    };
    SetNext(next, demote);

+    if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) {
+        const u32 version = properties.driverVersion;
+        // Broken in this driver
+        if (version > VK_MAKE_API_VERSION(0, 2, 0, 193)) {
+            LOG_WARNING(Render_Vulkan, "AMD proprietary driver versions newer than 21.9.1 "
+                                       "(windows) / 0.2.0.194 (amdvlk) have "
+                                       "broken VkPhysicalDeviceFloat16Int8FeaturesKHR");
+            is_int8_supported = false;
+            is_float16_supported = false;
+        }
+    }
+
    if (is_int8_supported || is_float16_supported) {
        VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{
            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,
@@ -560,7 +573,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
    logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);

    CollectPhysicalMemoryInfo();
-    CollectTelemetryParameters();
    CollectToolingInfo();

    if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
@@ -587,6 +599,26 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
            ext_extended_dynamic_state = false;
        }
    }
+
+    sets_per_pool = 64;
+    if (driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE) {
+        // AMD drivers need a higher amount of Sets per Pool in certain circunstances like in XC2.
+        sets_per_pool = 96;
+    }
+
+    const bool is_amd = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
+                        driver_id == VK_DRIVER_ID_MESA_RADV ||
+                        driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE;
+    if (ext_sampler_filter_minmax && is_amd) {
+        // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
+        if (!is_float16_supported) {
+            LOG_WARNING(
+                Render_Vulkan,
+                "Blacklisting AMD GCN4 and lower for VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME");
+            ext_sampler_filter_minmax = false;
+        }
+    }
+
    if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
        LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
        ext_vertex_input_dynamic_state = false;
@@ -839,6 +871,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
    bool has_khr_shader_float16_int8{};
    bool has_khr_workgroup_memory_explicit_layout{};
    bool has_khr_pipeline_executable_properties{};
+    bool has_khr_image_format_list{};
+    bool has_khr_swapchain_mutable_format{};
    bool has_ext_subgroup_size_control{};
    bool has_ext_transform_feedback{};
    bool has_ext_custom_border_color{};
@@ -888,6 +922,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
        test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
        test(has_khr_workgroup_memory_explicit_layout,
             VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
+        test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
+        test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME,
+             false);
        test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
        if (Settings::values.enable_nsight_aftermath) {
            test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
@@ -1066,6 +1103,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
            khr_pipeline_executable_properties = true;
        }
    }
+    if (has_khr_image_format_list && has_khr_swapchain_mutable_format) {
+        extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
+        extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
+        khr_swapchain_mutable_format = true;
+    }
    if (khr_push_descriptor) {
        VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
        push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -224,6 +224,11 @@ public:
        return khr_pipeline_executable_properties;
    }

+    /// Returns true if VK_KHR_swapchain_mutable_format is enabled.
+    bool IsKhrSwapchainMutableFormatEnabled() const {
+        return khr_swapchain_mutable_format;
+    }
+
    /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
    bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
        return khr_workgroup_memory_explicit_layout;
@@ -318,6 +323,10 @@ public:
        return device_access_memory;
    }

+    u32 GetSetsPerPool() const {
+        return sets_per_pool;
+    }
+
 private:
    /// Checks if the physical device is suitable.
    void CheckSuitability(bool requires_swapchain) const;
@@ -371,6 +380,7 @@ private:
    VkShaderStageFlags guest_warp_stages{};     ///< Stages where the guest warp size can be forced.
    u64 device_access_memory{};                 ///< Total size of device local memory in bytes.
    u32 max_push_descriptors{};                 ///< Maximum number of push descriptors
+    u32 sets_per_pool{};                        ///< Sets per Description Pool
    bool is_optimal_astc_supported{};           ///< Support for native ASTC.
    bool is_float16_supported{};                ///< Support for float16 arithmetic.
    bool is_int8_supported{};                   ///< Support for int8 arithmetic.
@@ -390,6 +400,7 @@ private:
    bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
    bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor.
    bool khr_pipeline_executable_properties{};   ///< Support for executable properties.
+    bool khr_swapchain_mutable_format{};         ///< Support for VK_KHR_swapchain_mutable_format.
    bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.
    bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.
    bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted.
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -812,7 +812,7 @@ void Config::ReadRendererValues() {
    ReadGlobalSetting(Settings::values.use_disk_shader_cache);
    ReadGlobalSetting(Settings::values.gpu_accuracy);
    ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
-    ReadGlobalSetting(Settings::values.use_nvdec_emulation);
+    ReadGlobalSetting(Settings::values.nvdec_emulation);
    ReadGlobalSetting(Settings::values.accelerate_astc);
    ReadGlobalSetting(Settings::values.use_vsync);
    ReadGlobalSetting(Settings::values.shader_backend);
@@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() {
                 static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
                 Settings::values.gpu_accuracy.UsingGlobal());
    WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
-    WriteGlobalSetting(Settings::values.use_nvdec_emulation);
+    WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()),
+                 static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)),
+                 static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
+                 Settings::values.nvdec_emulation.UsingGlobal());
    WriteGlobalSetting(Settings::values.accelerate_astc);
    WriteGlobalSetting(Settings::values.use_vsync);
    WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -182,5 +182,6 @@ private:
 Q_DECLARE_METATYPE(Settings::CPUAccuracy);
 Q_DECLARE_METATYPE(Settings::GPUAccuracy);
 Q_DECLARE_METATYPE(Settings::FullscreenMode);
+Q_DECLARE_METATYPE(Settings::NvdecEmulation);
 Q_DECLARE_METATYPE(Settings::RendererBackend);
 Q_DECLARE_METATYPE(Settings::ShaderBackend);
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() {
    ui->api_widget->setEnabled(runtime_lock);
    ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
    ui->use_disk_shader_cache->setEnabled(runtime_lock);
-    ui->use_nvdec_emulation->setEnabled(runtime_lock);
+    ui->nvdec_emulation_widget->setEnabled(runtime_lock);
    ui->accelerate_astc->setEnabled(runtime_lock);
    ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
    ui->use_asynchronous_gpu_emulation->setChecked(
        Settings::values.use_asynchronous_gpu_emulation.GetValue());
-    ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
    ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());

    if (Settings::IsConfiguringGlobal()) {
        ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
        ui->fullscreen_mode_combobox->setCurrentIndex(
            static_cast<int>(Settings::values.fullscreen_mode.GetValue()));
+        ui->nvdec_emulation->setCurrentIndex(
+            static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
        ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
    } else {
        ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
        ConfigurationShared::SetHighlight(ui->api_widget,
                                          !Settings::values.renderer_backend.UsingGlobal());

+        ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation,
+                                               &Settings::values.nvdec_emulation);
+        ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget,
+                                          !Settings::values.nvdec_emulation.UsingGlobal());
+
        ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
                                               &Settings::values.fullscreen_mode);
        ConfigurationShared::SetHighlight(ui->fullscreen_mode_label,
@@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() {
    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
                                             ui->use_asynchronous_gpu_emulation,
                                             use_asynchronous_gpu_emulation);
-    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
-                                             ui->use_nvdec_emulation, use_nvdec_emulation);
    ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
                                             accelerate_astc);

@@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() {
        if (Settings::values.renderer_backend.UsingGlobal()) {
            Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
        }
+        if (Settings::values.nvdec_emulation.UsingGlobal()) {
+            Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+        }
        if (Settings::values.shader_backend.UsingGlobal()) {
            Settings::values.shader_backend.SetValue(shader_backend);
        }
@@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() {
            }
        }

+        if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+            Settings::values.nvdec_emulation.SetGlobal(true);
+        } else {
+            Settings::values.nvdec_emulation.SetGlobal(false);
+            Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
+        }
+
        if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
            Settings::values.bg_red.SetGlobal(true);
            Settings::values.bg_green.SetGlobal(true);
@@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
                                                  ConfigurationShared::USE_GLOBAL_OFFSET);
 }

+Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const {
+    if (Settings::IsConfiguringGlobal()) {
+        return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex());
+    }
+
+    if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
+        Settings::values.nvdec_emulation.SetGlobal(true);
+        return Settings::values.nvdec_emulation.GetValue();
+    }
+    Settings::values.nvdec_emulation.SetGlobal(false);
+    return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() -
+                                                 ConfigurationShared::USE_GLOBAL_OFFSET);
+}
+
 void ConfigureGraphics::SetupPerGameUI() {
    if (Settings::IsConfiguringGlobal()) {
        ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal());
@@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() {
        ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
        ui->use_asynchronous_gpu_emulation->setEnabled(
            Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
-        ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
+        ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal());
        ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
        ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
        ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
@@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() {

    ConfigurationShared::SetColoredTristate(
        ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
-    ConfigurationShared::SetColoredTristate(
-        ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
    ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
                                            accelerate_astc);
    ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
@@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() {
        static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
    ConfigurationShared::InsertGlobalItem(
        ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
+    ConfigurationShared::InsertGlobalItem(
+        ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
 }
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -43,6 +43,7 @@ private:
    void SetupPerGameUI();

    Settings::RendererBackend GetCurrentGraphicsBackend() const;
+    Settings::NvdecEmulation GetCurrentNvdecEmulation() const;

    std::unique_ptr<Ui::ConfigureGraphics> ui;
    QColor bg_color;
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -156,7 +156,7 @@
        <item>
         <widget class="QCheckBox" name="use_disk_shader_cache">
          <property name="text">
-           <string>Use disk shader cache</string>
+           <string>Use disk pipeline cache</string>
          </property>
         </widget>
        </item>
@@ -167,13 +167,6 @@
          </property>
         </widget>
        </item>
-        <item>
-         <widget class="QCheckBox" name="use_nvdec_emulation">
-          <property name="text">
-           <string>Use NVDEC emulation</string>
-          </property>
-         </widget>
-        </item>
        <item>
          <widget class="QCheckBox" name="accelerate_astc">
            <property name="text">
@@ -181,6 +174,50 @@
            </property>
          </widget>
        </item>
+        <item>
+         <widget class="QWidget" name="nvdec_emulation_widget" native="true">
+          <layout class="QHBoxLayout" name="nvdec_emulation_layout">
+           <property name="leftMargin">
+            <number>0</number>
+           </property>
+           <property name="topMargin">
+            <number>0</number>
+           </property>
+           <property name="rightMargin">
+            <number>0</number>
+           </property>
+           <property name="bottomMargin">
+            <number>0</number>
+           </property>
+           <item>
+            <widget class="QLabel" name="nvdec_emulation_label">
+             <property name="text">
+              <string>NVDEC emulation:</string>
+             </property>
+            </widget>
+           </item>
+           <item>
+            <widget class="QComboBox" name="nvdec_emulation">
+             <item>
+              <property name="text">
+               <string>Disabled</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>CPU Decoding</string>
+              </property>
+             </item>
+             <item>
+              <property name="text">
+               <string>GPU Decoding</string>
+              </property>
+             </item>
+            </widget>
+           </item>
+          </layout>
+         </widget>
+        </item>
        <item>
         <widget class="QWidget" name="fullscreen_mode_layout" native="true">
          <layout class="QHBoxLayout" name="horizontalLayout_1">
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -82,7 +82,7 @@
           <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
          </property>
          <property name="text">
-           <string>Use asynchronous shader building (hack)</string>
+           <string>Use asynchronous shader building (Hack)</string>
          </property>
         </widget>
        </item>
@@ -92,7 +92,7 @@
            <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
          </property>
          <property name="text">
-           <string>Use Fast GPU Time (hack)</string>
+           <string>Use Fast GPU Time (Hack)</string>
          </property>
         </widget>
        </item>
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
    QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
    QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));
    QAction* open_transferable_shader_cache =
-        context_menu.addAction(tr("Open Transferable Shader Cache"));
+        context_menu.addAction(tr("Open Transferable Pipeline Cache"));
    context_menu.addSeparator();
    QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
    QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
    QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
    QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
-    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache"));
-    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache"));
+    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
+    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
    remove_menu->addSeparator();
-    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches"));
+    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));
    QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
    QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
    QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -3174,12 +3174,11 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv
 }

 bool GMainWindow::ConfirmClose() {
-    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing)
+    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
        return true;
-
-    QMessageBox::StandardButton answer =
-        QMessageBox::question(this, tr("yuzu"), tr("Are you sure you want to close yuzu?"),
-                              QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+    }
+    const auto text = tr("Are you sure you want to close yuzu?");
+    const auto answer = QMessageBox::question(this, tr("yuzu"), text);
    return answer != QMessageBox::No;
 }

@@ -3261,14 +3260,13 @@ bool GMainWindow::ConfirmChangeGame() {
 }

 bool GMainWindow::ConfirmForceLockedExit() {
-    if (emu_thread == nullptr)
+    if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) {
        return true;
+    }
+    const auto text = tr("The currently running application has requested yuzu to not exit.\n\n"
+                         "Would you like to bypass this and exit anyway?");

-    const auto answer =
-        QMessageBox::question(this, tr("yuzu"),
-                              tr("The currently running application has requested yuzu to not "
-                                 "exit.\n\nWould you like to bypass this and exit anyway?"),
-                              QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
+    const auto answer = QMessageBox::question(this, tr("yuzu"), text);
    return answer != QMessageBox::No;
 }

--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -465,7 +465,7 @@ void Config::ReadValues() {
    ReadSetting("Renderer", Settings::values.disable_fps_limit);
    ReadSetting("Renderer", Settings::values.shader_backend);
    ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
-    ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
+    ReadSetting("Renderer", Settings::values.nvdec_emulation);
    ReadSetting("Renderer", Settings::values.accelerate_astc);
    ReadSetting("Renderer", Settings::values.use_fast_gpu_time);

--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -261,9 +261,9 @@ shader_backend =
 # 0 (default): Off, 1: On
 use_asynchronous_shaders =

-# Enable NVDEC emulation.
-# 0: Off, 1 (default): On
-use_nvdec_emulation =
+# NVDEC emulation.
+# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding
+nvdec_emulation =

 # Accelerate ASTC texture decoding.
 # 0: Off, 1 (default): On
Author	SHA1	Message	Date
ameerj	db1c4b125f	vulkan_debug_callback: Ignore InvalidCommandBuffer-VkDescriptorSet errors This validation error is spammed on some titles, asserting that VkDescriptorSet 0x0[] was destroyed. This is likely a validation layer bug when using VK_KHR_push_descriptor, which can avoid using traditional VkDescriptorSet. It should be safe to ignore for now.	2021-09-13 23:08:59 -04:00
Morph	d86a9b9a4b	Merge pull request #6943 from FernandoS27/omae-wa-mou-shindeiru Vulkan: Disable VK_EXT_SAMPLER_FILTER_MINMAX in GCN AMD	2021-09-13 17:33:15 -04:00
Fernando Sahmkow	7a712da2b3	Vulkan: Disable VK_EXT_SAMPLER_FILTER_MINMAX in GCN AMD since it's broken.	2021-09-13 23:29:57 +02:00
Morph	62e88d0e74	Merge pull request #7006 from FernandoS27/a-motherfucking-driver Vulkan: Blacklist Int8Float16 Extension on AMD on driver 21.9.1	2021-09-13 17:25:56 -04:00
Mai M	edf3da346f	Merge pull request #7005 from Morph1984/enum-bitwise-shift-ops common_funcs: Add enum flag bitwise shift operator overloads	2021-09-13 17:21:22 -04:00
Morph	fde9b84b21	Merge pull request #6944 from FernandoS27/dear-drunk-me Vulkan/Descriptors: Increase sets per pool on AMD propietary driver.	2021-09-13 17:20:07 -04:00
Fernando Sahmkow	e7c8a0bb23	Vulkan: Blacklist Int8Float16 Extension on AMD on driver 21.9.1	2021-09-13 23:17:37 +02:00
Fernando S	1bb28dfe2c	Merge pull request #7001 from ameerj/wario-fix vk_rasterizer: Fix dynamic StencilOp updating when two faces are enabled	2021-09-13 23:16:59 +02:00
Fernando Sahmkow	e7ca37b1e5	Vulkan/Descriptors: Increase sets per pool on AMFD propietary driver.	2021-09-13 23:09:18 +02:00
Morph	3512cae623	common_funcs: Add enum flag bitwise shift operator overloads This adds bitwise shift operator overloads (<<, >>, <<=, >>=) in the macro DECLARE_ENUM_FLAG_OPERATORS(type)	2021-09-13 16:01:20 -04:00
Ameer J	d180fd7c36	Merge pull request #7000 from Morph1984/create-dir-comment FS: Mark recursive CreateDirectory as inaccurate and temporary	2021-09-12 21:06:52 -04:00
Mai M	e4318d2207	Merge pull request #7002 from ameerj/vk-state-unused vk_state_tracker: Remove unused function	2021-09-12 17:31:56 -04:00
ameerj	678f73069f	vk_rasterizer: Fix dynamic StencilOp updating when two faces are enabled This function was incorrectly using the stencil_two_side_enable register when dynamically updating the StencilOp.	2021-09-12 16:19:12 -04:00
ameerj	8e289ade15	vk_state_tracker: Remove unused function	2021-09-12 15:28:24 -04:00
Morph	727f607e00	FS: Mark recursive CreateDirectory as inaccurate and temporary	2021-09-12 14:06:01 -04:00
Morph	9248442bb2	Merge pull request #6948 from ameerj/amd-warp-fix shaders: Fix warp instructions on 64-thread warp devices	2021-09-12 13:53:29 -04:00
Morph	4ab549e62a	Merge pull request #6975 from ogniK5377/acc-async-ctx account: EnsureTokenIdCacheAsync	2021-09-12 12:03:10 -04:00
Morph	f0f416e85c	Merge pull request #6974 from ogniK5377/fs-recursive-createdir FS: Recursively create directories for CreateDirectory	2021-09-12 12:02:39 -04:00
Morph	9907302465	Merge pull request #6997 from ameerj/stop-emulation-confirmation main: Apply confirm exit setting in exit locked scenarios	2021-09-12 12:01:57 -04:00
Morph	3428232bca	Merge pull request #6992 from german77/brains hid/am: Stub SetTouchScreenConfiguration and implement GetNotificationStorageChannelEvent	2021-09-12 12:01:43 -04:00
Morph	74030eb427	Merge pull request #6987 from Morph1984/common-error common: Move error handling functions out of common_funcs	2021-09-12 12:01:23 -04:00
Morph	47b6f522bd	Merge pull request #6986 from Morph1984/version-update api_version: Update and add AtmosphereTargetFirmware	2021-09-12 12:01:11 -04:00
ameerj	188cf1aed2	main: Apply confirm exit setting in exit locked scenarios Some titles set an exit lock through HLE, which prompts an exit confirmation when stopping emulation if the system is locked. This change allows bypassing this confirmation if the setting to confirm exits has been disabled by the user.	2021-09-12 00:31:32 -04:00
Morph	e67463df24	shader_environment: Add missing <algorithm> include	2021-09-11 17:19:16 -04:00
Morph	63b4c8f9f7	vk_descriptor_pool: Add missing <algorithm> include	2021-09-11 17:19:16 -04:00
Morph	76abf55f25	slot_vector: Add missing <algorithm> include	2021-09-11 17:19:15 -04:00
Morph	554c46d186	video_core/memory_manager: Add missing <algorithm> include	2021-09-11 17:19:15 -04:00
Morph	6f307f1521	kernel: Add missing <functional> include	2021-09-11 17:19:15 -04:00
Morph	4a6a73e887	file_sys/kernel_executable: Add missing <string> include	2021-09-11 17:19:14 -04:00
Morph	ae028ddf22	codec: Add missing <string_view> include	2021-09-11 17:19:14 -04:00
Morph	eb1e3f19bb	common_funcs: Replace <algorithm> with <iterator>	2021-09-11 17:19:14 -04:00
Morph	290afc00d3	common: Move error handling to error.cpp/h This allows us to avoid implicitly including <string> every time common_funcs.h is included.	2021-09-11 17:19:14 -04:00
Fernando S	be4e192903	Merge pull request #6846 from ameerj/nvdec-gpu-decode nvdec: Add GPU video decoding for all capable drivers and platforms	2021-09-11 23:11:32 +02:00
Fernando S	82c867164b	Merge pull request #6901 from ameerj/vk-clear-bits vk_rasterizer: Only clear depth/stencil buffers when specified in attachment aspect mask	2021-09-11 22:36:22 +02:00
Fernando S	ec6490f5ad	Merge pull request #6941 from ameerj/swapchain-srgb vk_swapchain: Prefer linear swapchain format when presenting sRGB images	2021-09-11 22:36:03 +02:00
Fernando S	472aad69db	Merge pull request #6953 from ameerj/anv-semaphore renderer_vulkan: Wait on present semaphore at queue submit	2021-09-11 22:35:52 +02:00
Fernando S	55854c807d	Merge pull request #6981 from ameerj/nvflinger-hb-format nvflinger: Use external surface format for framebuffer creation	2021-09-11 22:35:25 +02:00
german77	9bddcdac69	am: Implement GetNotificationStorageChannelEvent	2021-09-10 12:24:50 -05:00
german77	a7bbd37f81	hid: Stub SetTouchScreenConfiguration	2021-09-10 12:24:28 -05:00
Morph	c9710f6c78	api_version: Update and add AtmosphereTargetFirmware	2021-09-10 01:10:47 -04:00
bunnei	7e9163779d	Merge pull request #6962 from vonchenplus/spirv_support_legacy_attribute renderer_vulkan: Spirv support glsl legacy attribute	2021-09-08 14:04:44 -07:00
Chloe	005b0e68db	Addressed issues Co-authored-by: Mai M. <mathew1800@gmail.com>	2021-09-09 03:00:08 +10:00
Chloe Marcec	543081e4a1	Mark is_complete as atomic	2021-09-09 00:10:52 +10:00
Chloe Marcec	89958e27aa	Addressed issues	2021-09-09 00:09:04 +10:00
Fernando S	6b16f7807e	Merge pull request #6980 from vonchenplus/fix_blend_equation_error Fix blend equation enum error	2021-09-08 11:50:26 +02:00
Ameer J	eb1ba45c39	Merge pull request #6971 from bunnei/buffer-queue-kevent core: hle: service: buffer_queue: Improve management of KEvent.	2021-09-08 00:34:36 -04:00
Feng Chen	b1e655f898	Detail adjustment	2021-09-08 10:30:00 +08:00
Feng Chen	bbc1800c1b	Detail adjustment	2021-09-08 09:53:10 +08:00
Feng Chen	e5ca733722	Re-implement get unused location	2021-09-07 13:22:52 +08:00
Feng Chen	9cdf2383e9	Move attribute related definitions to spirv anonymous namespace	2021-09-07 12:34:35 +08:00
ameerj	9e2bf49677	nvflinger: Use external surface format for framebuffer creation The format member the IGBPBuffer may not always specify the correct desired format. Using the external format member ensures a valid format is provided when creating the framebuffer. Fixes homebrew using the wrong framebuffer format.	2021-09-06 23:14:31 -04:00
Ameer J	ab73787d8f	Merge pull request #6977 from Moonlacer/master Second part of Golden's PR #6976	2021-09-06 22:58:23 -04:00
Ameer J	743428e025	Merge pull request #6976 from goldenx86/patch-2 Rename all shader cache strings to pipeline cache	2021-09-06 22:58:03 -04:00
Feng Chen	0292374807	Fix blend equation enum error	2021-09-07 10:12:09 +08:00
Moonlacer	bdd153bc0d	Second part of Golden's PR	2021-09-06 15:25:40 -05:00
Matías Locatti	296fa4e06e	Rename all shader cache references to pipeline cache After Hades, both OpenGL and Vulkan use a pipeline cache instead of single stages of the graphics pipeline. Renamed the Remove menu entries to match.	2021-09-06 15:53:04 -03:00
Chloe Marcec	9141816b10	address name shadowing with system	2021-09-06 22:13:51 +10:00
Chloe Marcec	4e2aa50cef	account: EnsureTokenIdCacheAsync Closes #2547, #6946	2021-09-06 21:16:21 +10:00
bunnei	51ccc29cdd	Merge pull request #6965 from bunnei/cpu_manager_jthread core: cpu_manager: Use jthread.	2021-09-06 03:49:14 -07:00
Chloe Marcec	0b891c9245	FS: Recursively create directories for CreateDirectory Originally we only created the parent directory, this caused issues for creating directories which also contained subdirectories, eg `/Folder1/Folder2` This allows the ultimate mod manager homebrew to at least boot	2021-09-06 19:35:55 +10:00
Feng Chen	1de9e4e121	Dynamic get unused location	2021-09-06 10:46:03 +08:00
Feng Chen	d994466a08	Implement intput and output fixed fnc textures	2021-09-06 10:36:45 +08:00
bunnei	25a97e0139	core: cpu_manager: Use jthread.	2021-09-03 19:05:41 -07:00
Feng Chen	a7bbaa4897	Rename parameters	2021-09-03 23:52:20 +08:00
Feng Chen	cf26f375ff	Fix create GraphicsPipelines crash	2021-09-03 22:55:53 +08:00
ameerj	7d854fbdb0	renderer_vulkan: Wait on present semaphore at queue submit The present semaphore is being signalled by the call to acquire the swapchain image. This semaphore is meant to be waited on when rendering to the swapchain image. Currently it is waited on when presenting, but moving its usage to be waited on in the command buffer submission allows for proper usage of this semaphore. Fixes the device lost when launching titles on the Intel Linux Mesa driver.	2021-09-02 13:13:20 -04:00
Feng Chen	1e2a89d306	Add input/output location	2021-09-02 23:34:51 +08:00
ameerj	d956fb3c7c	emit_glsl_warp: Fix shuffle ops for 64-thread warp sizes	2021-08-31 16:11:25 -04:00
ameerj	5b45dfe971	emit_glsl_warp: Fix ballot related ops for 64-thread warp sizes	2021-08-31 16:11:25 -04:00
ameerj	a5d9dcf3d9	emit_spirv_warp: Fix shuffle ops for 64-thread warp sizes	2021-08-31 13:40:39 -04:00
ameerj	95213270ef	emit_spirv_warp: Fix ballot related ops for 64-thread warp sizes	2021-08-31 13:40:12 -04:00
Feng Chen	73b11f390e	Add colorfront and txtcoord support	2021-09-01 00:07:25 +08:00
ameerj	27f8f3333f	vulkan_device: Enable VK_KHR_swapchain_mutable_format if available Silences validation errors when creating sRGB image views of linear swapchain images	2021-08-29 02:03:36 -04:00
ameerj	3c65c8580f	vk_swapchain: Prefer linear swapchain format when presenting sRGB images Fixes broken sRGB when presenting from a secondary GPU.	2021-08-29 02:03:35 -04:00
ameerj	e0397f00d0	vk_rasterizer: Only clear depth and stencil buffers when set in attachment aspect mask Silences validation errors for clearing the depth/stencil buffers of framebuffer attachments that were not specified to have depth/stencil usage.	2021-08-21 02:37:15 -04:00
ameerj	b384129c63	h264: Lower max_num_ref_frames GPU decoding seems to be more picky when it comes to the maximum number of reference frames.	2021-08-16 14:40:53 -04:00
ameerj	cd016d3cb5	configure_graphics: Add GPU nvdec decoding as an option Some system configurations may see visual regressions or lower performance using GPU decoding compared to CPU decoding. This setting provides the option for users to specify their decoding preference. Co-Authored-By: yzct12345 <87620833+yzct12345@users.noreply.github.com>	2021-08-16 14:40:53 -04:00
ameerj	a832aa699f	codec: Improve libav memory alloc and cleanup	2021-08-16 14:40:53 -04:00
ameerj	bc3efb79cc	codec: Fallback to CPU decoding if no compatible GPU format is found	2021-08-16 14:40:53 -04:00
lat9nq	92bc51b66a	cmake: Add VDPAU and NVDEC support to FFmpeg Adds {h264_,vp9_}{nvdec,vdpau} hwaccels.	2021-08-16 14:40:52 -04:00
ameerj	356e10898f	codec: Replace deprecated av_init_packet usage	2021-08-12 01:28:01 -04:00
ameerj	0be4e402e2	cmake: Always find LIBVA, update windows FFmpeg version Allows the use of VAAPI gpu decoders on system installed ffmpeg as well.	2021-08-12 01:28:01 -04:00
ameerj	659039ca6d	nvdec: Implement GPU accelerated decoding for all platforms Supplements the VAAPI intel gpu decoder by implementing the D3D11VA decoder for Windows, and CUVID/VDPAU for Nvidia and AMD on drivers linux respectively.	2021-08-12 01:28:01 -04:00