gl_shader_decompiler: Implement image binding settings

shader: Implement bindless images
shader: Decode SUST and implement backing image functionality
2019-05-16 20:03:51 -03:00 · 2019-05-16 20:03:51 -03:00 · 2019-05-16 20:03:51 -03:00 · 2019-05-16 20:03:51 -03:00 · 2019-05-16 20:03:50 -03:00 · 2019-05-16 20:03:50 -03:00
123 changed files with 2859 additions and 5247 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,7 +132,7 @@ find_package(Threads REQUIRED)
 if (ENABLE_SDL2)
    if (YUZU_USE_BUNDLED_SDL2)
        # Detect toolchain and platform
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
            set(SDL2_VER "SDL2-2.0.8")
        else()
            message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
@@ -165,7 +165,7 @@ if (YUZU_USE_BUNDLED_UNICORN)
    if (MSVC)
        message(STATUS "unicorn not found, falling back to bundled")
        # Detect toolchain and platform
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
            set(UNICORN_VER "unicorn-yuzu")
        else()
            message(FATAL_ERROR "No bundled Unicorn binaries for your toolchain. Disable YUZU_USE_BUNDLED_UNICORN and provide your own.")
@@ -233,7 +233,7 @@ endif()

 if (ENABLE_QT)
    if (YUZU_USE_BUNDLED_QT)
-        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1930) AND ARCHITECTURE_x86_64)
+        if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
            set(QT_VER qt-5.12.0-msvc2017_64)
        else()
            message(FATAL_ERROR "No bundled Qt binaries for your toolchain. Disable YUZU_USE_BUNDLED_QT and provide your own.")
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -70,6 +70,7 @@ set(HASH_FILES
    "${VIDEO_CORE}/shader/decode/half_set.cpp"
    "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+    "${VIDEO_CORE}/shader/decode/image.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
--- a/externals/glad/include/KHR/khrplatform.h
+++ b/externals/glad/include/KHR/khrplatform.h
@@ -90,20 +90,12 @@
 *                                  int arg2) KHRONOS_APIATTRIBUTES;
 */

-#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
-#   define KHRONOS_STATIC 1
-#endif
-
 /*-------------------------------------------------------------------------
 * Definition of KHRONOS_APICALL
 *-------------------------------------------------------------------------
 * This precedes the return type of the function in the function prototype.
 */
-#if defined(KHRONOS_STATIC)
-    /* If the preprocessor constant KHRONOS_STATIC is defined, make the
-     * header compatible with static linking. */
-#   define KHRONOS_APICALL
-#elif defined(_WIN32)
+#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
 #   define KHRONOS_APICALL __declspec(dllimport)
 #elif defined (__SYMBIAN32__)
 #   define KHRONOS_APICALL IMPORT_C
@@ -119,7 +111,7 @@
 * This follows the return type of the function  and precedes the function
 * name in the function prototype.
 */
-#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(KHRONOS_STATIC)
+#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
    /* Win32 but not WinCE */
 #   define KHRONOS_APIENTRY __stdcall
 #else
--- a/externals/glad/include/glad/glad.h
+++ b/externals/glad/include/glad/glad.h
--- a/externals/glad/src/glad.c
+++ b/externals/glad/src/glad.c
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      "${VIDEO_CORE}/shader/decode/half_set.cpp"
      "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/hfma2.cpp"
+      "${VIDEO_CORE}/shader/decode/image.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -438,7 +438,7 @@ inline float RequestParser::Pop() {
 template <>
 inline double RequestParser::Pop() {
    const u64 value = Pop<u64>();
-    double real;
+    float real;
    std::memcpy(&real, &value, sizeof(real));
    return real;
 }
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -43,7 +43,7 @@ void SessionRequestHandler::ClientDisconnected(const SharedPtr<ServerSession>& s
 }

 SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
-    const std::string& reason, u64 timeout, WakeupCallback&& callback,
+    SharedPtr<Thread> thread, const std::string& reason, u64 timeout, WakeupCallback&& callback,
    SharedPtr<WritableEvent> writable_event) {
    // Put the client thread to sleep until the wait event is signaled or the timeout expires.
    thread->SetWakeupCallback([context = *this, callback](
@@ -58,7 +58,7 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
    auto& kernel = Core::System::GetInstance().Kernel();
    if (!writable_event) {
        // Create event if not provided
-        const auto pair = WritableEvent::CreateEventPair(kernel, ResetType::Automatic,
+        const auto pair = WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                         "HLE Pause Event: " + reason);
        writable_event = pair.writable;
    }
@@ -76,9 +76,8 @@ SharedPtr<WritableEvent> HLERequestContext::SleepClientThread(
    return writable_event;
 }

-HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session,
-                                     SharedPtr<Thread> thread)
-    : server_session(std::move(server_session)), thread(std::move(thread)) {
+HLERequestContext::HLERequestContext(SharedPtr<Kernel::ServerSession> server_session)
+    : server_session(std::move(server_session)) {
    cmd_buf[0] = 0;
 }

--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -97,7 +97,7 @@ protected:
 */
 class HLERequestContext {
 public:
-    explicit HLERequestContext(SharedPtr<ServerSession> session, SharedPtr<Thread> thread);
+    explicit HLERequestContext(SharedPtr<ServerSession> session);
    ~HLERequestContext();

    /// Returns a pointer to the IPC command buffer for this request.
@@ -119,6 +119,7 @@ public:
    /**
     * Puts the specified guest thread to sleep until the returned event is signaled or until the
     * specified timeout expires.
+     * @param thread Thread to be put to sleep.
     * @param reason Reason for pausing the thread, to be used for debugging purposes.
     * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
     * invoked with a Timeout reason.
@@ -129,8 +130,8 @@ public:
     * created.
     * @returns Event that when signaled will resume the thread and call the callback function.
     */
-    SharedPtr<WritableEvent> SleepClientThread(const std::string& reason, u64 timeout,
-                                               WakeupCallback&& callback,
+    SharedPtr<WritableEvent> SleepClientThread(SharedPtr<Thread> thread, const std::string& reason,
+                                               u64 timeout, WakeupCallback&& callback,
                                               SharedPtr<WritableEvent> writable_event = nullptr);

    /// Populates this context with data from the requesting process/thread.
@@ -267,7 +268,6 @@ private:

    std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf;
    SharedPtr<Kernel::ServerSession> server_session;
-    SharedPtr<Thread> thread;
    // TODO(yuriks): Check common usage of this and optimize size accordingly
    boost::container::small_vector<SharedPtr<Object>, 8> move_objects;
    boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -33,8 +33,8 @@ enum class HandleType : u32 {
 };

 enum class ResetType {
-    Automatic, ///< Reset automatically on object acquisition
-    Manual,    ///< Never reset automatically
+    OneShot, ///< Reset automatically on object acquisition
+    Sticky,  ///< Never reset automatically
 };

 class Object : NonCopyable {
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -21,9 +21,8 @@ bool ReadableEvent::ShouldWait(const Thread* thread) const {
 void ReadableEvent::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");

-    if (reset_type == ResetType::Automatic) {
+    if (reset_type == ResetType::OneShot)
        signaled = false;
-    }
 }

 void ReadableEvent::Signal() {
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -130,7 +130,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
    // The ServerSession received a sync request, this means that there's new data available
    // from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
    // similar.
-    Kernel::HLERequestContext context(this, thread);
+    Kernel::HLERequestContext context(this);
    u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
    context.PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1255,8 +1255,8 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
    return vm_manager.MapCodeMemory(dst_address, src_address, size);
 }

-static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle,
-                                         u64 dst_address, u64 src_address, u64 size) {
+ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
+                                  u64 src_address, u64 size) {
    LOG_DEBUG(Kernel_SVC,
              "called. process_handle=0x{:08X}, dst_address=0x{:016X}, src_address=0x{:016X}, "
              "size=0x{:016X}",
@@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) {
 /// Creates a new thread
 static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg,
                               VAddr stack_top, u32 priority, s32 processor_id) {
-    LOG_DEBUG(Kernel_SVC,
+    LOG_TRACE(Kernel_SVC,
              "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, "
              "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}",
              entry_point, arg, stack_top, priority, processor_id, *out_handle);
@@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e

 /// Starts the thread for the provided handle
 static ResultCode StartThread(Core::System& system, Handle thread_handle) {
-    LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
+    LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);

    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {

 /// Called when a thread exits
 static void ExitThread(Core::System& system) {
-    LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());

    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->Stop();
@@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) {

 /// Sleep the current thread
 static void SleepThread(Core::System& system, s64 nanoseconds) {
-    LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
+    LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);

    enum class SleepType : s64 {
        YieldWithoutLoadBalancing = 0,
@@ -1880,51 +1880,11 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
 }

 static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
-                                    u64 affinity_mask) {
-    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
-              thread_handle, core, affinity_mask);
+                                    u64 mask) {
+    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle,
+              mask, core);

-    const auto* const current_process = system.Kernel().CurrentProcess();
-
-    if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
-        const u8 ideal_cpu_core = current_process->GetIdealCore();
-
-        ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
-
-        // Set the target CPU to the ideal core specified by the process.
-        core = ideal_cpu_core;
-        affinity_mask = 1ULL << core;
-    } else {
-        const u64 core_mask = current_process->GetCoreMask();
-
-        if ((core_mask | affinity_mask) != core_mask) {
-            LOG_ERROR(
-                Kernel_SVC,
-                "Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})",
-                core_mask, affinity_mask);
-            return ERR_INVALID_PROCESSOR_ID;
-        }
-
-        if (affinity_mask == 0) {
-            LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero.");
-            return ERR_INVALID_COMBINATION;
-        }
-
-        if (core < Core::NUM_CPU_CORES) {
-            if ((affinity_mask & (1ULL << core)) == 0) {
-                LOG_ERROR(Kernel_SVC,
-                          "Core is not enabled for the current mask, core={}, mask={:016X}", core,
-                          affinity_mask);
-                return ERR_INVALID_COMBINATION;
-            }
-        } else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) &&
-                   core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) {
-            LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core);
-            return ERR_INVALID_PROCESSOR_ID;
-        }
-    }
-
-    const auto& handle_table = current_process->GetHandleTable();
+    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
    const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
    if (!thread) {
        LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
@@ -1932,7 +1892,40 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
        return ERR_INVALID_HANDLE;
    }

-    thread->ChangeCore(core, affinity_mask);
+    if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) {
+        const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore();
+
+        ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL));
+
+        // Set the target CPU to the ideal core specified by the process.
+        core = ideal_cpu_core;
+        mask = 1ULL << core;
+    }
+
+    if (mask == 0) {
+        LOG_ERROR(Kernel_SVC, "Mask is 0");
+        return ERR_INVALID_COMBINATION;
+    }
+
+    /// This value is used to only change the affinity mask without changing the current ideal core.
+    static constexpr u32 OnlyChangeMask = static_cast<u32>(-3);
+
+    if (core == OnlyChangeMask) {
+        core = thread->GetIdealCore();
+    } else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) {
+        LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core);
+        return ERR_INVALID_PROCESSOR_ID;
+    }
+
+    // Error out if the input core isn't enabled in the input mask.
+    if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
+        LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}",
+                  core, mask);
+        return ERR_INVALID_COMBINATION;
+    }
+
+    thread->ChangeCore(core, mask);
+
    return RESULT_SUCCESS;
 }

@@ -1987,7 +1980,7 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle

    auto& kernel = system.Kernel();
    const auto [readable_event, writable_event] =
-        WritableEvent::CreateEventPair(kernel, ResetType::Manual, "CreateEvent");
+        WritableEvent::CreateEventPair(kernel, ResetType::Sticky, "CreateEvent");

    HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();

@@ -2190,8 +2183,8 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
    return RESULT_SUCCESS;
 }

-static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
-                                u32 out_thread_ids_size, Handle debug_handle) {
+ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAddr out_thread_ids,
+                         u32 out_thread_ids_size, Handle debug_handle) {
    // TODO: Handle this case when debug events are supported.
    UNIMPLEMENTED_IF(debug_handle != InvalidHandle);

--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -30,21 +30,12 @@ enum ThreadPriority : u32 {
 };

 enum ThreadProcessorId : s32 {
-    /// Indicates that no particular processor core is preferred.
-    THREADPROCESSORID_DONT_CARE = -1,
-
-    /// Run thread on the ideal core specified by the process.
-    THREADPROCESSORID_IDEAL = -2,
-
-    /// Indicates that the preferred processor ID shouldn't be updated in
-    /// a core mask setting operation.
-    THREADPROCESSORID_DONT_UPDATE = -3,
-
-    THREADPROCESSORID_0 = 0,   ///< Run thread on core 0
-    THREADPROCESSORID_1 = 1,   ///< Run thread on core 1
-    THREADPROCESSORID_2 = 2,   ///< Run thread on core 2
-    THREADPROCESSORID_3 = 3,   ///< Run thread on core 3
-    THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this
+    THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process.
+    THREADPROCESSORID_0 = 0,      ///< Run thread on core 0
+    THREADPROCESSORID_1 = 1,      ///< Run thread on core 1
+    THREADPROCESSORID_2 = 2,      ///< Run thread on core 2
+    THREADPROCESSORID_3 = 3,      ///< Run thread on core 3
+    THREADPROCESSORID_MAX = 4,    ///< Processor ID must be less than this

    /// Allowed CPU mask
    THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) |
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -276,7 +276,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
    RegisterHandlers(functions);

    auto& kernel = Core::System::GetInstance().Kernel();
-    launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
+    launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
                                                              "ISelfController:LaunchableEvent");
 }

@@ -442,10 +442,10 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c

 AppletMessageQueue::AppletMessageQueue() {
    auto& kernel = Core::System::GetInstance().Kernel();
-    on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
+    on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
                                                            "AMMessageQueue:OnMessageRecieved");
    on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Automatic, "AMMessageQueue:OperationModeChanged");
+        kernel, Kernel::ResetType::OneShot, "AMMessageQueue:OperationModeChanged");
 }

 AppletMessageQueue::~AppletMessageQueue() = default;
@@ -835,7 +835,6 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
-        return;
    }

    std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
@@ -858,7 +857,6 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
-        return;
    }

    ctx.WriteBuffer(backing.buffer.data() + offset, size);
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -26,11 +26,11 @@ namespace Service::AM::Applets {
 AppletDataBroker::AppletDataBroker() {
    auto& kernel = Core::System::GetInstance().Kernel();
    state_changed_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
+        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:StateChangedEvent");
    pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopDataOutEvent");
+        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopDataOutEvent");
    pop_interactive_out_data_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
+        kernel, Kernel::ResetType::Sticky, "ILibraryAppletAccessor:PopInteractiveDataOutEvent");
 }

 AppletDataBroker::~AppletDataBroker() = default;
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -68,7 +68,7 @@ AOC_U::AOC_U() : ServiceFramework("aoc:u"), add_on_content(AccumulateAOCTitleIDs
    RegisterHandlers(functions);

    auto& kernel = Core::System::GetInstance().Kernel();
-    aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
+    aoc_change_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
                                                              "GetAddOnContentListChanged:Event");
 }

--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -67,7 +67,7 @@ public:
        // This is the event handle used to check if the audio buffer was released
        auto& system = Core::System::GetInstance();
        buffer_event = Kernel::WritableEvent::CreateEventPair(
-            system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");

        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
                                       audio_params.channel_count, std::move(unique_name),
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -8,7 +8,6 @@

 #include "audio_core/audio_renderer.h"
 #include "common/alignment.h"
-#include "common/bit_util.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
@@ -47,7 +46,7 @@ public:

        auto& system = Core::System::GetInstance();
        system_event = Kernel::WritableEvent::CreateEventPair(
-            system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
                                                              system_event.writable);
    }
@@ -179,7 +178,7 @@ public:
        RegisterHandlers(functions);

        auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                              "IAudioOutBufferReleasedEvent");
    }

@@ -263,304 +262,64 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
    OpenAudioRendererImpl(ctx);
 }

-static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) {
-    // +1 represents the final mix.
-    return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count +
-           1;
-}
-
 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
    LOG_DEBUG(Service_Audio, "called");

-    // Several calculations below align the sizes being calculated
-    // onto a 64 byte boundary.
-    static constexpr u64 buffer_alignment_size = 64;
+    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
+    buffer_sz += params.submix_count * 1024;
+    buffer_sz += 0x940 * (params.submix_count + 1);
+    buffer_sz += 0x3F0 * params.voice_count;
+    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
+    buffer_sz += Common::AlignUp(
+        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
+            (params.mix_buffer_count + 6),
+        0x40);

-    // Some calculations that calculate portions of the buffer
-    // that will contain information, on the other hand, align
-    // the result of some of their calcularions on a 16 byte boundary.
-    static constexpr u64 info_field_alignment_size = 16;
-
-    // Maximum detail entries that may exist at one time for performance
-    // frame statistics.
-    static constexpr u64 max_perf_detail_entries = 100;
-
-    // Size of the data structure representing the bulk of the voice-related state.
-    static constexpr u64 voice_state_size = 0x100;
-
-    // Size of the upsampler manager data structure
-    constexpr u64 upsampler_manager_size = 0x48;
-
-    // Calculates the part of the size that relates to mix buffers.
-    const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) {
-        // As of 8.0.0 this is the maximum on voice channels.
-        constexpr u64 max_voice_channels = 6;
-
-        // The service expects the sample_count member of the parameters to either be
-        // a value of 160 or 240, so the maximum sample count is assumed in order
-        // to adequately handle all values at runtime.
-        constexpr u64 default_max_sample_count = 240;
-
-        const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels;
-
-        u64 size = 0;
-        size += total_mix_buffers * (sizeof(s32) * params.sample_count);
-        size += total_mix_buffers * (sizeof(s32) * default_max_sample_count);
-        size += u64{params.submix_count} + params.sink_count;
-        size = Common::AlignUp(size, buffer_alignment_size);
-        size += Common::AlignUp(params.unknown_30, buffer_alignment_size);
-        size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size);
-        return size;
-    };
-
-    // Calculates the portion of the size related to the mix data (and the sorting thereof).
-    const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) {
-        // The size of the mixing info data structure.
-        constexpr u64 mix_info_size = 0x940;
-
-        // Consists of total submixes with the final mix included.
-        const u64 total_mix_count = u64{params.submix_count} + 1;
-
-        // The total number of effects that may be available to the audio renderer at any time.
-        constexpr u64 max_effects = 256;
-
-        // Calculates the part of the size related to the audio node state.
-        // This will only be used if the audio revision supports the splitter.
-        const auto calculate_node_state_size = [](std::size_t num_nodes) {
-            // Internally within a nodestate, it appears to use a data structure
-            // similar to a std::bitset<64> twice.
-            constexpr u64 bit_size = Common::BitSize<u64>();
-            constexpr u64 num_bitsets = 2;
-
-            // Node state instances have three states internally for performing
-            // depth-first searches of nodes. Initialized, Found, and Done Sorting.
-            constexpr u64 num_states = 3;
-
-            u64 size = 0;
-            size += (num_nodes * num_nodes) * sizeof(s32);
-            size += num_states * (num_nodes * sizeof(s32));
-            size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>());
-            return size;
-        };
-
-        // Calculates the part of the size related to the adjacency (aka edge) matrix.
-        const auto calculate_edge_matrix_size = [](std::size_t num_nodes) {
-            return (num_nodes * num_nodes) * sizeof(s32);
-        };
-
-        u64 size = 0;
-        size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size);
-        size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size);
-        size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count,
-                                info_field_alignment_size);
-
-        if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-            size += Common::AlignUp(calculate_node_state_size(total_mix_count) +
-                                        calculate_edge_matrix_size(total_mix_count),
-                                    info_field_alignment_size);
+    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+        const u32 count = params.submix_count + 1;
+        u64 node_count = Common::AlignUp(count, 0x40);
+        const u64 node_state_buffer_sz =
+            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
+        u64 edge_matrix_buffer_sz = 0;
+        node_count = Common::AlignUp(count * count, 0x40);
+        if (node_count >> 31 != 0) {
+            edge_matrix_buffer_sz = (node_count | 7) / 8;
+        } else {
+            edge_matrix_buffer_sz = node_count / 8;
        }
+        buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10);
+    }

-        return size;
-    };
+    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
+    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
+        buffer_sz += 0xE0 * params.num_splitter_send_channels;
+        buffer_sz += 0x20 * params.splitter_count;
+        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
+    }
+    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
+    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
+                    ((params.voice_count * 256) | 0x40);

-    // Calculates the part of the size related to voice channel info.
-    const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) {
-        constexpr u64 voice_info_size = 0x220;
-        constexpr u64 voice_resource_size = 0xD0;
-
-        u64 size = 0;
-        size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size);
-        size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size);
-        size +=
-            Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size);
-        size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size);
-        return size;
-    };
-
-    // Calculates the part of the size related to memory pools.
-    const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) {
-        const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count);
-        const u64 memory_pool_info_size = 0x20;
-        return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size);
-    };
-
-    // Calculates the part of the size related to the splitter context.
-    const auto calculate_splitter_context_size =
-        [this](const AudioCore::AudioRendererParameter& params) -> u64 {
-        if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-            return 0;
-        }
-
-        constexpr u64 splitter_info_size = 0x20;
-        constexpr u64 splitter_destination_data_size = 0xE0;
-
-        u64 size = 0;
-        size += params.num_splitter_send_channels;
-        size +=
-            Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size);
-        size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels,
-                                info_field_alignment_size);
-
-        return size;
-    };
-
-    // Calculates the part of the size related to the upsampler info.
-    const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) {
-        constexpr u64 upsampler_info_size = 0x280;
-        // Yes, using the buffer size over info alignment size is intentional here.
-        return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count),
-                               buffer_alignment_size);
-    };
-
-    // Calculates the part of the size related to effect info.
-    const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) {
-        constexpr u64 effect_info_size = 0x2B0;
-        return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size);
-    };
-
-    // Calculates the part of the size related to audio sink info.
-    const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) {
-        const u64 sink_info_size = 0x170;
-        return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size);
-    };
-
-    // Calculates the part of the size related to voice state info.
-    const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) {
-        const u64 voice_state_size = 0x100;
-        const u64 additional_size = buffer_alignment_size - 1;
-        return Common::AlignUp(voice_state_size * params.voice_count + additional_size,
-                               info_field_alignment_size);
-    };
-
-    // Calculates the part of the size related to performance statistics.
-    const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) {
-        // Extra size value appended to the end of the calculation.
-        constexpr u64 appended = 128;
-
-        // Whether or not we assume the newer version of performance metrics data structures.
-        const bool is_v2 =
-            IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision);
-
-        // Data structure sizes
-        constexpr u64 perf_statistics_size = 0x0C;
-        const u64 header_size = is_v2 ? 0x30 : 0x18;
-        const u64 entry_size = is_v2 ? 0x18 : 0x10;
-        const u64 detail_size = is_v2 ? 0x18 : 0x10;
-
-        const u64 entry_count = CalculateNumPerformanceEntries(params);
-        const u64 size_per_frame =
-            header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries);
-
-        u64 size = 0;
-        size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1,
-                                buffer_alignment_size);
-        size += Common::AlignUp(perf_statistics_size, buffer_alignment_size);
-        size += appended;
-        return size;
-    };
-
-    // Calculates the part of the size that relates to the audio command buffer.
-    const auto calculate_command_buffer_size =
-        [this](const AudioCore::AudioRendererParameter& params) {
-            constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
-
-            if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
-                constexpr u64 command_buffer_size = 0x18000;
-
-                return command_buffer_size + alignment;
-            }
-
-            // When the variadic command buffer is supported, this means
-            // the command generator for the audio renderer can issue commands
-            // that are (as one would expect), variable in size. So what we need to do
-            // is determine the maximum possible size for a few command data structures
-            // then multiply them by the amount of present commands indicated by the given
-            // respective audio parameters.
-
-            constexpr u64 max_biquad_filters = 2;
-            constexpr u64 max_mix_buffers = 24;
-
-            constexpr u64 biquad_filter_command_size = 0x2C;
-
-            constexpr u64 depop_mix_command_size = 0x24;
-            constexpr u64 depop_setup_command_size = 0x50;
-
-            constexpr u64 effect_command_max_size = 0x540;
-
-            constexpr u64 mix_command_size = 0x1C;
-            constexpr u64 mix_ramp_command_size = 0x24;
-            constexpr u64 mix_ramp_grouped_command_size = 0x13C;
-
-            constexpr u64 perf_command_size = 0x28;
-
-            constexpr u64 sink_command_size = 0x130;
-
-            constexpr u64 submix_command_max_size =
-                depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
-
-            constexpr u64 volume_command_size = 0x1C;
-            constexpr u64 volume_ramp_command_size = 0x20;
-
-            constexpr u64 voice_biquad_filter_command_size =
-                biquad_filter_command_size * max_biquad_filters;
-            constexpr u64 voice_data_command_size = 0x9C;
-            const u64 voice_command_max_size =
-                (params.splitter_count * depop_setup_command_size) +
-                (voice_data_command_size + voice_biquad_filter_command_size +
-                 volume_ramp_command_size + mix_ramp_grouped_command_size);
-
-            // Now calculate the individual elements that comprise the size and add them together.
-            const u64 effect_commands_size = params.effect_count * effect_command_max_size;
-
-            const u64 final_mix_commands_size =
-                depop_mix_command_size + volume_command_size * max_mix_buffers;
-
-            const u64 perf_commands_size =
-                perf_command_size *
-                (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
-
-            const u64 sink_commands_size = params.sink_count * sink_command_size;
-
-            const u64 splitter_commands_size =
-                params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
-
-            const u64 submix_commands_size = params.submix_count * submix_command_max_size;
-
-            const u64 voice_commands_size = params.voice_count * voice_command_max_size;
-
-            return effect_commands_size + final_mix_commands_size + perf_commands_size +
-                   sink_commands_size + splitter_commands_size + submix_commands_size +
-                   voice_commands_size + alignment;
-        };
-
-    IPC::RequestParser rp{ctx};
-    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
-
-    u64 size = 0;
-    size += calculate_mix_buffer_sizes(params);
-    size += calculate_mix_info_size(params);
-    size += calculate_voice_info_size(params);
-    size += upsampler_manager_size;
-    size += calculate_memory_pools_size(params);
-    size += calculate_splitter_context_size(params);
-
-    size = Common::AlignUp(size, buffer_alignment_size);
-
-    size += calculate_upsampler_info_size(params);
-    size += calculate_effect_info_size(params);
-    size += calculate_sink_info_size(params);
-    size += calculate_voice_state_size(params);
-    size += calculate_perf_size(params);
-    size += calculate_command_buffer_size(params);
-
-    // finally, 4KB page align the size, and we're done.
-    size = Common::AlignUp(size, 4096);
+    if (params.performance_frame_count >= 1) {
+        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
+                                      16 * params.voice_count + 16) +
+                                     0x658) *
+                                            (params.performance_frame_count + 1) +
+                                        0xc0,
+                                    0x40) +
+                    output_sz;
+    }
+    output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000);

    IPC::ResponseBuilder rb{ctx, 4};
-    rb.Push(RESULT_SUCCESS);
-    rb.Push<u64>(size);

-    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size);
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u64>(output_sz);
+
+    LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz);
 }

 void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
@@ -598,15 +357,10 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
 }

 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
-    // Byte swap
-    const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
-
+    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
    switch (feature) {
    case AudioFeatures::Splitter:
-        return version_num >= 2U;
-    case AudioFeatures::PerformanceMetricsVersion2:
-    case AudioFeatures::VariadicCommandBuffer:
-        return version_num >= 5U;
+        return version_num >= 2u;
    default:
        return false;
    }
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -28,8 +28,6 @@ private:

    enum class AudioFeatures : u32 {
        Splitter,
-        PerformanceMetricsVersion2,
-        VariadicCommandBuffer,
    };

    bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -34,8 +34,8 @@ public:
        RegisterHandlers(functions);

        auto& kernel = Core::System::GetInstance().Kernel();
-        register_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic, "BT:RegisterEvent");
+        register_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
+                                                                "BT:RegisterEvent");
    }

 private:
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -57,13 +57,13 @@ public:
        RegisterHandlers(functions);

        auto& kernel = Core::System::GetInstance().Kernel();
-        scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+        scan_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                            "IBtmUserCore:ScanEvent");
        connection_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic, "IBtmUserCore:ConnectionEvent");
+            kernel, Kernel::ResetType::OneShot, "IBtmUserCore:ConnectionEvent");
        service_discovery = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic, "IBtmUserCore:Discovery");
-        config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+            kernel, Kernel::ResetType::OneShot, "IBtmUserCore:Discovery");
+        config_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                              "IBtmUserCore:ConfigEvent");
    }

--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -170,7 +170,7 @@ void Controller_NPad::InitNewlyAddedControler(std::size_t controller_idx) {
 void Controller_NPad::OnInit() {
    auto& kernel = Core::System::GetInstance().Kernel();
    styleset_changed_event = Kernel::WritableEvent::CreateEventPair(
-        kernel, Kernel::ResetType::Automatic, "npad:NpadStyleSetChanged");
+        kernel, Kernel::ResetType::OneShot, "npad:NpadStyleSetChanged");

    if (!IsControllerActivated()) {
        return;
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -26,7 +26,7 @@ constexpr ResultCode ERR_NO_APPLICATION_AREA(ErrorModule::NFP, 152);
 Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
    : ServiceFramework(name), module(std::move(module)) {
    auto& kernel = Core::System::GetInstance().Kernel();
-    nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+    nfc_tag_load = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                          "IUser:NFCTagDetected");
 }

@@ -67,9 +67,9 @@ public:

        auto& kernel = Core::System::GetInstance().Kernel();
        deactivate_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic, "IUser:DeactivateEvent");
+            kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
        availability_change_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic, "IUser:AvailabilityChangeEvent");
+            kernel, Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
    }

 private:
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -62,9 +62,9 @@ public:
        RegisterHandlers(functions);

        auto& kernel = Core::System::GetInstance().Kernel();
-        event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+        event1 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                        "IRequest:Event1");
-        event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+        event2 = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                        "IRequest:Event2");
    }

--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -141,7 +141,7 @@ public:

        auto& kernel = Core::System::GetInstance().Kernel();
        finished_event = Kernel::WritableEvent::CreateEventPair(
-            kernel, Kernel::ResetType::Automatic,
+            kernel, Kernel::ResetType::OneShot,
            "IEnsureNetworkClockAvailabilityService:FinishEvent");
    }

--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -129,7 +129,7 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
    RegisterHandlers(functions);

    auto& kernel = Core::System::GetInstance().Kernel();
-    query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
+    query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::OneShot,
                                                         "NVDRV::query_event");
 }

--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -16,7 +16,7 @@ namespace Service::NVFlinger {

 BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
    auto& kernel = Core::System::GetInstance().Kernel();
-    buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
+    buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
                                                               "BufferQueue NativeHandle");
 }

--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -2,15 +2,16 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <algorithm>
 #include <chrono>
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/client_port.h"
+#include "core/hle/kernel/client_session.h"
 #include "core/hle/service/set/set.h"
 #include "core/settings.h"

 namespace Service::Set {
-namespace {
+
 constexpr std::array<LanguageCode, 17> available_language_codes = {{
    LanguageCode::JA,
    LanguageCode::EN_US,
@@ -31,35 +32,41 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{
    LanguageCode::ZH_HANT,
 }};

-constexpr std::size_t pre4_0_0_max_entries = 15;
-constexpr std::size_t post4_0_0_max_entries = 17;
+constexpr std::size_t pre4_0_0_max_entries = 0xF;
+constexpr std::size_t post4_0_0_max_entries = 0x40;

 constexpr ResultCode ERR_INVALID_LANGUAGE{ErrorModule::Settings, 625};

-void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_language_codes) {
-    IPC::ResponseBuilder rb{ctx, 3};
-    rb.Push(RESULT_SUCCESS);
-    rb.Push(static_cast<u32>(num_language_codes));
-}
-
-void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_size) {
-    const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode);
-    const std::size_t copy_amount = std::min(requested_amount, max_size);
-    const std::size_t copy_size = copy_amount * sizeof(LanguageCode);
-
-    ctx.WriteBuffer(available_language_codes.data(), copy_size);
-    PushResponseLanguageCode(ctx, copy_amount);
-}
-} // Anonymous namespace
-
 LanguageCode GetLanguageCodeFromIndex(std::size_t index) {
    return available_language_codes.at(index);
 }

+template <std::size_t size>
+static std::array<LanguageCode, size> MakeLanguageCodeSubset() {
+    std::array<LanguageCode, size> arr;
+    std::copy_n(available_language_codes.begin(), size, arr.begin());
+    return arr;
+}
+
+static void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t max_size) {
+    IPC::ResponseBuilder rb{ctx, 3};
+    rb.Push(RESULT_SUCCESS);
+    if (available_language_codes.size() > max_size) {
+        rb.Push(static_cast<u32>(max_size));
+    } else {
+        rb.Push(static_cast<u32>(available_language_codes.size()));
+    }
+}
+
 void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_SET, "called");

-    GetAvailableLanguageCodesImpl(ctx, pre4_0_0_max_entries);
+    if (available_language_codes.size() > pre4_0_0_max_entries) {
+        ctx.WriteBuffer(MakeLanguageCodeSubset<pre4_0_0_max_entries>());
+    } else {
+        ctx.WriteBuffer(available_language_codes);
+    }
+    PushResponseLanguageCode(ctx, pre4_0_0_max_entries);
 }

 void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
@@ -80,7 +87,12 @@ void SET::MakeLanguageCode(Kernel::HLERequestContext& ctx) {
 void SET::GetAvailableLanguageCodes2(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_SET, "called");

-    GetAvailableLanguageCodesImpl(ctx, post4_0_0_max_entries);
+    if (available_language_codes.size() > post4_0_0_max_entries) {
+        ctx.WriteBuffer(MakeLanguageCodeSubset<post4_0_0_max_entries>());
+    } else {
+        ctx.WriteBuffer(available_language_codes);
+    }
+    PushResponseLanguageCode(ctx, post4_0_0_max_entries);
 }

 void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
@@ -90,9 +102,9 @@ void SET::GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx) {
 }

 void SET::GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_SET, "called");
-
    PushResponseLanguageCode(ctx, post4_0_0_max_entries);
+
+    LOG_DEBUG(Service_SET, "called");
 }

 void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -17,7 +17,7 @@ namespace Service::VI {

 Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
    auto& kernel = Core::System::GetInstance().Kernel();
-    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
+    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
                                                         fmt::format("Display VSync Event {}", id));
 }

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -556,7 +556,7 @@ private:
            } else {
                // Wait the current thread until a buffer becomes available
                ctx.SleepClientThread(
-                    "IHOSBinderDriver::DequeueBuffer", -1,
+                    Kernel::GetCurrentThread(), "IHOSBinderDriver::DequeueBuffer", -1,
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -89,6 +89,7 @@ add_library(video_core STATIC
    shader/decode/conversion.cpp
    shader/decode/memory.cpp
    shader/decode/texture.cpp
+    shader/decode/image.cpp
    shader/decode/float_set_predicate.cpp
    shader/decode/integer_set_predicate.cpp
    shader/decode/half_set_predicate.cpp
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -40,13 +40,6 @@ bool DmaPusher::Step() {
    }

    const CommandList& command_list{dma_pushbuffer.front()};
-    ASSERT_OR_EXECUTE(!command_list.empty(), {
-        // Somehow the command_list is empty, in order to avoid a crash
-        // We ignore it and assume its size is 0.
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-        return true;
-    });
    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
    GPUVAddr dma_get = command_list_header.addr;
    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <cstring>
-
 #include "common/assert.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/memory_manager.h"
@@ -12,9 +10,7 @@
 namespace Tegra::Engines::Upload {

 State::State(MemoryManager& memory_manager, Registers& regs)
-    : regs{regs}, memory_manager{memory_manager} {}
-
-State::~State() = default;
+    : memory_manager(memory_manager), regs(regs) {}

 void State::ProcessExec(const bool is_linear) {
    write_offset = 0;
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -4,8 +4,10 @@

 #pragma once

+#include <cstddef>
 #include <vector>
 #include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"

 namespace Tegra {
@@ -55,10 +57,10 @@ struct Registers {
 class State {
 public:
    State(MemoryManager& memory_manager, Registers& regs);
-    ~State();
+    ~State() = default;

-    void ProcessExec(bool is_linear);
-    void ProcessData(u32 data, bool is_last_call);
+    void ProcessExec(const bool is_linear);
+    void ProcessData(const u32 data, const bool is_last_call);

 private:
    u32 write_offset = 0;
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -34,9 +34,9 @@ void Maxwell3D::InitializeRegisterDefaults() {

    // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
    // needed for ARMS.
-    for (auto& viewport : regs.viewports) {
-        viewport.depth_range_near = 0.0f;
-        viewport.depth_range_far = 1.0f;
+    for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
+        regs.viewports[viewport].depth_range_near = 0.0f;
+        regs.viewports[viewport].depth_range_far = 1.0f;
    }

    // Doom and Bomberman seems to use the uninitialized registers and just enable blend
@@ -47,13 +47,13 @@ void Maxwell3D::InitializeRegisterDefaults() {
    regs.blend.equation_a = Regs::Blend::Equation::Add;
    regs.blend.factor_source_a = Regs::Blend::Factor::One;
    regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
-    for (auto& blend : regs.independent_blend) {
-        blend.equation_rgb = Regs::Blend::Equation::Add;
-        blend.factor_source_rgb = Regs::Blend::Factor::One;
-        blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
-        blend.equation_a = Regs::Blend::Equation::Add;
-        blend.factor_source_a = Regs::Blend::Factor::One;
-        blend.factor_dest_a = Regs::Blend::Factor::Zero;
+    for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
+        regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
+        regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
    }
    regs.stencil_front_op_fail = Regs::StencilOp::Keep;
    regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
@@ -75,11 +75,11 @@ void Maxwell3D::InitializeRegisterDefaults() {

    // TODO(bunnei): Some games do not initialize the color masks (e.g. Sonic Mania). Assuming a
    // default of enabled fixes rendering here.
-    for (auto& color_mask : regs.color_mask) {
-        color_mask.R.Assign(1);
-        color_mask.G.Assign(1);
-        color_mask.B.Assign(1);
-        color_mask.A.Assign(1);
+    for (std::size_t color_mask = 0; color_mask < Regs::NumRenderTargets; color_mask++) {
+        regs.color_mask[color_mask].R.Assign(1);
+        regs.color_mask[color_mask].G.Assign(1);
+        regs.color_mask[color_mask].B.Assign(1);
+        regs.color_mask[color_mask].A.Assign(1);
    }

    // Commercial games seem to assume this value is enabled and nouveau sets this value manually.
@@ -178,13 +178,13 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {

        // Vertex buffer
        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
        }
    }
@@ -432,17 +432,13 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    Texture::TICEntry tic_entry;
    memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));

-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
-                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
-               "TIC versions other than BlockLinear or Pitch are unimplemented");
-
-    const auto r_type = tic_entry.r_type.Value();
-    const auto g_type = tic_entry.g_type.Value();
-    const auto b_type = tic_entry.b_type.Value();
-    const auto a_type = tic_entry.a_type.Value();
+    const auto r_type{tic_entry.r_type.Value()};
+    const auto g_type{tic_entry.g_type.Value()};
+    const auto b_type{tic_entry.b_type.Value()};
+    const auto a_type{tic_entry.a_type.Value()};

    // TODO(Subv): Different data types for separate components are not supported
-    DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+    ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);

    return tic_entry;
 }
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,7 +6,6 @@

 #include <array>
 #include <bitset>
-#include <type_traits>
 #include <unordered_map>
 #include <vector>

@@ -59,7 +58,6 @@ public:
        static constexpr std::size_t NumCBData = 16;
        static constexpr std::size_t NumVertexArrays = 32;
        static constexpr std::size_t NumVertexAttributes = 32;
-        static constexpr std::size_t NumVaryings = 31;
        static constexpr std::size_t NumTextureSamplers = 32;
        static constexpr std::size_t NumClipDistances = 8;
        static constexpr std::size_t MaxShaderProgram = 6;
@@ -1109,7 +1107,6 @@ public:
    } regs{};

    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
-    static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");

    struct State {
        struct ConstBufferInfo {
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -98,10 +98,6 @@ union Attribute {
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
        BitField<47, 3, AttributeSize> size;
-
-        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
-        }
    } fmt20;

    union {
@@ -126,6 +122,15 @@ union Sampler {
    u64 value{};
 };

+union Image {
+    Image() = default;
+
+    constexpr explicit Image(u64 value) : value{value} {}
+
+    BitField<36, 13, u64> index;
+    u64 value;
+};
+
 } // namespace Tegra::Shader

 namespace std {
@@ -344,6 +349,26 @@ enum class TextureMiscMode : u64 {
    PTP,
 };

+enum class SurfaceDataMode : u64 {
+    P = 0,
+    D_BA = 1,
+};
+
+enum class OutOfBoundsStore : u64 {
+    Ignore = 0,
+    Clamp = 1,
+    Trap = 2,
+};
+
+enum class ImageType : u64 {
+    Texture1D = 0,
+    TextureBuffer = 1,
+    Texture1DArray = 2,
+    Texture2D = 3,
+    Texture2DArray = 4,
+    Texture3D = 5,
+};
+
 enum class IsberdMode : u64 {
    None = 0,
    Patch = 1,
@@ -398,7 +423,7 @@ enum class LmemLoadCacheManagement : u64 {
    CV = 3,
 };

-enum class LmemStoreCacheManagement : u64 {
+enum class StoreCacheManagement : u64 {
    Default = 0,
    CG = 1,
    CS = 2,
@@ -503,11 +528,6 @@ enum class SystemVariable : u64 {
    CircularQueueEntryAddressHigh = 0x63,
 };

-enum class PhysicalAttributeDirection : u64 {
-    Input = 0,
-    Output = 1,
-};
-
 union Instruction {
    Instruction& operator=(const Instruction& instr) {
        value = instr.value;
@@ -529,11 +549,6 @@ union Instruction {
    BitField<39, 8, Register> gpr39;
    BitField<48, 16, u64> opcode;

-    union {
-        BitField<8, 8, Register> gpr;
-        BitField<20, 24, s64> offset;
-    } gmem;
-
    union {
        BitField<20, 16, u64> imm20_16;
        BitField<20, 19, u64> imm20_19;
@@ -601,7 +616,6 @@ union Instruction {
    } alu;

    union {
-        BitField<38, 1, u64> idx;
        BitField<51, 1, u64> saturate;
        BitField<52, 2, IpaSampleMode> sample_mode;
        BitField<54, 2, IpaInterpMode> interp_mode;
@@ -811,30 +825,21 @@ union Instruction {
    } ld_l;

    union {
-        BitField<44, 2, LmemStoreCacheManagement> cache_management;
+        BitField<44, 2, StoreCacheManagement> cache_management;
    } st_l;

    union {
        BitField<48, 3, UniformType> type;
        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> immediate_offset;
    } ldg;

    union {
        BitField<48, 3, UniformType> type;
        BitField<46, 2, u64> cache_mode;
+        BitField<20, 24, s64> immediate_offset;
    } stg;

-    union {
-        BitField<32, 1, PhysicalAttributeDirection> direction;
-        BitField<47, 3, AttributeSize> size;
-        BitField<20, 11, u64> address;
-    } al2p;
-
-    union {
-        BitField<53, 3, UniformType> type;
-        BitField<52, 1, u64> extended;
-    } generic;
-
    union {
        BitField<0, 3, u64> pred0;
        BitField<3, 3, u64> pred3;
@@ -1231,6 +1236,20 @@ union Instruction {
        }
    } texs;

+    union {
+        BitField<28, 1, u64> is_array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<35, 1, u64> aoffi;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> ms; // Multisample?
+        BitField<54, 1, u64> cl;
+        BitField<55, 1, u64> process_mode;
+
+        TextureProcessMode GetTextureProcessMode() const {
+            return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
+        }
+    } tld;
+
    union {
        BitField<49, 1, u64> nodep_flag;
        BitField<53, 4, u64> texture_info;
@@ -1280,6 +1299,35 @@ union Instruction {
        }
    } tlds;

+    union {
+        BitField<24, 2, StoreCacheManagement> cache_management;
+        BitField<33, 3, ImageType> image_type;
+        BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
+        BitField<51, 1, u64> is_immediate;
+        BitField<52, 1, SurfaceDataMode> mode;
+
+        BitField<20, 3, StoreType> store_data_layout;
+        BitField<20, 4, u64> component_mask_selector;
+
+        bool IsComponentEnabled(std::size_t component) const {
+            ASSERT(mode == SurfaceDataMode::P);
+            constexpr u8 R = 0b0001;
+            constexpr u8 G = 0b0010;
+            constexpr u8 B = 0b0100;
+            constexpr u8 A = 0b1000;
+            constexpr std::array<u8, 16> mask = {
+                0,       (R),         (G),         (R | G),        (B),     (R | B),
+                (G | B), (R | G | B), (A),         (R | A),        (G | A), (R | G | A),
+                (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
+            return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
+        }
+
+        StoreType GetStoreDataLayout() const {
+            ASSERT(mode == SurfaceDataMode::D_BA);
+            return store_data_layout;
+        }
+    } sust;
+
    union {
        BitField<20, 24, u64> target;
        BitField<5, 1, u64> constant_buffer;
@@ -1371,6 +1419,7 @@ union Instruction {

    Attribute attribute;
    Sampler sampler;
+    Image image;

    u64 value;
 };
@@ -1395,24 +1444,23 @@ public:
        LD_L,
        LD_S,
        LD_C,
-        LD,  // Load from generic memory
-        LDG, // Load from global memory
        ST_A,
        ST_L,
        ST_S,
-        ST,   // Store in generic memory
-        STG,  // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        LDG, // Load from global memory
+        STG, // Store in global memory
        TEX,
        TEX_B,  // Texture Load Bindless
        TXQ,    // Texture Query
        TXQ_B,  // Texture Query Bindless
        TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
+        TLD,    // Texture Load
        TLDS,   // Texture Load with scalar/non-vec4 source/destinations
        TLD4,   // Texture Load 4
        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
        TMML_B, // Texture Mip Map Level
        TMML,   // Texture Mip Map Level
+        SUST,   // Surface Store
        EXIT,
        IPA,
        OUT_R, // Emit vertex/primitive
@@ -1543,6 +1591,7 @@ public:
        Synch,
        Memory,
        Texture,
+        Image,
        FloatSet,
        FloatSetPredicate,
        IntegerSet,
@@ -1668,24 +1717,23 @@ private:
            INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
            INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
            INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
-            INST("100-------------", Id::LD, Type::Memory, "LD"),
-            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
            INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
-            INST("101-------------", Id::ST, Type::Memory, "ST"),
+            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
            INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
+            INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
            INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
+            INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
            INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -118,7 +118,7 @@ void SynchState::WaitForSynchronization(u64 fence) {
    // Wait for the GPU to be idle (all commands to be executed)
    {
        MICROPROFILE_SCOPE(GPU_wait);
-        std::unique_lock lock{synchronization_mutex};
+        std::unique_lock<std::mutex> lock{synchronization_mutex};
        synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
    }
 }
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -81,6 +81,12 @@ struct CommandDataContainer {
    CommandDataContainer(CommandData&& data, u64 next_fence)
        : data{std::move(data)}, fence{next_fence} {}

+    CommandDataContainer& operator=(const CommandDataContainer& t) {
+        data = std::move(t.data);
+        fence = t.fence;
+        return *this;
+    }
+
    CommandData data;
    u64 fence{};
 };
@@ -103,7 +109,7 @@ struct SynchState final {

    void TrySynchronize() {
        if (IsSynchronized()) {
-            std::lock_guard lock{synchronization_mutex};
+            std::lock_guard<std::mutex> lock{synchronization_mutex};
            synchronization_condition.notify_one();
        }
    }
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -118,12 +118,10 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
                          static_cast<u32>(opcode.operation.Value()));
    }

-    // An instruction with the Exit flag will not actually
-    // cause an exit if it's executed inside a delay slot.
-    // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
-    // testing on the MME code.
    if (opcode.is_exit) {
        // Exit has a delay slot, execute the next instruction
+        // Note: Executing an exit during a branch delay slot will cause the instruction at the
+        // branch target to be executed before exiting.
        Step(offset, true);
        return false;
    }
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -144,9 +144,8 @@ protected:

        object->SetIsRegistered(false);
        rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
-        const CacheAddr addr = object->GetCacheAddr();
        interval_cache.subtract({GetInterval(object), ObjectSet{object}});
-        map_cache.erase(addr);
+        map_cache.erase(object->GetCacheAddr());
    }

    /// Returns a ticks counter used for tracking when cached objects were last modified
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,15 +21,11 @@ T GetInteger(GLenum pname) {

 Device::Device() {
    uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
-    max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
-    max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
    has_variable_aoffi = TestVariableAoffi();
 }

 Device::Device(std::nullptr_t) {
    uniform_buffer_alignment = 0;
-    max_vertex_attributes = 16;
-    max_varyings = 15;
    has_variable_aoffi = true;
 }

--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -5,7 +5,6 @@
 #pragma once

 #include <cstddef>
-#include "common/common_types.h"

 namespace OpenGL {

@@ -18,14 +17,6 @@ public:
        return uniform_buffer_alignment;
    }

-    u32 GetMaxVertexAttributes() const {
-        return max_vertex_attributes;
-    }
-
-    u32 GetMaxVaryings() const {
-        return max_varyings;
-    }
-
    bool HasVariableAoffi() const {
        return has_variable_aoffi;
    }
@@ -34,8 +25,6 @@ private:
    static bool TestVariableAoffi();

    std::size_t uniform_buffer_alignment{};
-    u32 max_vertex_attributes{};
-    u32 max_varyings{};
    bool has_variable_aoffi{};
 };

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -29,8 +29,10 @@
 namespace OpenGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PixelFormat = VideoCore::Surface::PixelFormat;
-using SurfaceType = VideoCore::Surface::SurfaceType;
+
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceTarget;
+using VideoCore::Surface::SurfaceType;

 MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
@@ -119,11 +121,6 @@ void RasterizerOpenGL::CheckExtensions() {
            Render_OpenGL,
            "Anisotropic filter is not supported! This can cause graphical issues in some games.");
    }
-    if (!GLAD_GL_ARB_buffer_storage) {
-        LOG_WARNING(
-            Render_OpenGL,
-            "Buffer storage control is not supported! This can cause performance degradation.");
-    }
 }

 GLuint RasterizerOpenGL::SetupVertexFormat() {
@@ -261,8 +258,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
            // MakeQuadArray always generates u32 indexes
            params.index_format = GL_UNSIGNED_INT;
            params.count = (regs.vertex_buffer.count / 4) * 6;
-            params.index_buffer_offset = primitive_assembler.MakeQuadArray(
-                regs.vertex_buffer.first, regs.vertex_buffer.count);
+            params.index_buffer_offset =
+                primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
        }
        return params;
    }
@@ -323,8 +320,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
                                 static_cast<GLsizeiptr>(sizeof(ubo)));

        Shader shader{shader_cache.GetStageProgram(program)};
-        const auto [program_handle, next_bindings] =
-            shader->GetProgramHandle(primitive_mode, base_bindings);
+
+        const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)};
+        SetupConstBuffers(stage_enum, shader, base_bindings);
+        SetupGlobalRegions(stage_enum, shader, base_bindings);
+        const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
+
+        const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
+        const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);

        switch (program) {
        case Maxwell::ShaderProgram::VertexA:
@@ -342,11 +345,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
                              shader_config.enable.Value(), shader_config.offset);
        }

-        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
-        SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
-        SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
-        SetupTextures(stage_enum, shader, program_handle, base_bindings);
-
        // Workaround for Intel drivers.
        // When a clip distance is enabled but not set in the shader it crops parts of the screen
        // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -809,8 +807,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 }

 void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                                         const Shader& shader, GLuint program_handle,
-                                         BaseBindings base_bindings) {
+                                         const Shader& shader, BaseBindings base_bindings) {
    MICROPROFILE_SCOPE(OpenGL_UBO);
    const auto& gpu = system.GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
@@ -857,8 +854,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
 }

 void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                                          const Shader& shader, GLenum primitive_mode,
-                                          BaseBindings base_bindings) {
+                                          const Shader& shader, BaseBindings base_bindings) {
    const auto& entries = shader->GetShaderEntries().global_memory_entries;
    for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
        const auto& entry{entries[bindpoint]};
@@ -871,8 +867,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
    }
 }

-void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
-                                     GLuint program_handle, BaseBindings base_bindings) {
+TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
+                                                   BaseBindings base_bindings) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
    const auto& gpu = system.GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
@@ -881,6 +877,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
    ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
               "Exceeded the number of active textures.");

+    TextureBufferUsage texture_buffer_usage{0};
+
    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
        const auto& entry = entries[bindpoint];
        Tegra::Texture::FullTextureInfo texture;
@@ -894,18 +892,25 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
        }
        const u32 current_bindpoint = base_bindings.sampler + bindpoint;

-        state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
+        auto& unit{state.texture_units[current_bindpoint]};
+        unit.sampler = sampler_cache.GetSampler(texture.tsc);

        if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
-            state.texture_units[current_bindpoint].texture =
-                surface->Texture(entry.IsArray()).handle;
+            if (surface->GetSurfaceParams().target == SurfaceTarget::TextureBuffer) {
+                // Record that this texture is a texture buffer.
+                texture_buffer_usage.set(bindpoint);
+            }
+
+            unit.texture = surface->Texture(entry.IsArray()).handle;
            surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
                                   texture.tic.w_source);
        } else {
            // Can occur when texture addr is null or its memory is unmapped/invalid
-            state.texture_units[current_bindpoint].texture = 0;
+            unit.texture = 0;
        }
    }
+
+    return texture_buffer_usage;
 }

 void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
@@ -1135,9 +1140,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {

 void RasterizerOpenGL::SyncPointState() {
    const auto& regs = system.GPU().Maxwell3D().regs;
-    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
-    // in OpenGL).
-    state.point.size = std::max(1.0f, regs.point_size);
+    state.point.size = regs.point_size;
 }

 void RasterizerOpenGL::SyncPolygonOffset() {
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -106,16 +106,16 @@ private:

    /// Configures the current constbuffers to use for the draw command.
    void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
-                           GLuint program_handle, BaseBindings base_bindings);
+                           BaseBindings base_bindings);

    /// Configures the current global memory entries to use for the draw command.
    void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
-                            const Shader& shader, GLenum primitive_mode,
-                            BaseBindings base_bindings);
+                            const Shader& shader, BaseBindings base_bindings);

-    /// Configures the current textures to use for the draw command.
-    void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
-                       GLuint program_handle, BaseBindings base_bindings);
+    /// Configures the current textures to use for the draw command. Returns shaders texture buffer
+    /// usage.
+    TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+                                     const Shader& shader, BaseBindings base_bindings);

    /// Syncs the viewport and depth range to match the guest state
    void SyncViewport(OpenGLState& current_state);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,

    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
-    if (!params.is_tiled) {
+    if (config.tic.IsLineal()) {
        params.pitch = config.tic.Pitch();
    }
    params.unaligned_height = config.tic.Height();
@@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,

    switch (params.target) {
    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::TextureBuffer:
    case SurfaceTarget::Texture2D:
        params.depth = 1;
        break;
@@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) {
    switch (target) {
    case SurfaceTarget::Texture1D:
        return GL_TEXTURE_1D;
+    case SurfaceTarget::TextureBuffer:
+        return GL_TEXTURE_BUFFER;
    case SurfaceTarget::Texture2D:
        return GL_TEXTURE_2D;
    case SurfaceTarget::Texture3D:
@@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params)

    switch (params.target) {
    case SurfaceTarget::Texture1D:
-        glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
-                           width);
+        glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
+        break;
+    case SurfaceTarget::TextureBuffer:
+        texture_buffer.Create();
+        glNamedBufferStorage(texture_buffer.handle,
+                             params.width * GetBytesPerPixel(params.pixel_format), nullptr,
+                             GL_DYNAMIC_STORAGE_BIT);
+        glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
        break;
    case SurfaceTarget::Texture2D:
    case SurfaceTarget::TextureCubemap:
-        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
-                           width, height);
+        glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
        break;
    case SurfaceTarget::Texture3D:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
-                           width, height, params.depth);
+        glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
+                           params.depth);
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                     static_cast<u32>(params.target));
        UNREACHABLE();
-        glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
-                           width, height);
+        glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
    }

-    ApplyTextureDefaults(texture.handle, params.max_mip_level);
+    if (params.target != SurfaceTarget::TextureBuffer) {
+        ApplyTextureDefaults(texture.handle, params.max_mip_level);
+    }

    OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
 }
@@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t
            glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
                                tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
+        case SurfaceTarget::TextureBuffer:
+            ASSERT(mip_map == 0);
+            glNamedBufferSubData(texture_buffer.handle, x0,
+                                 static_cast<GLsizeiptr>(rect.GetWidth()) *
+                                     GetBytesPerPixel(params.pixel_format),
+                                 &gl_buffer[mip_map][buffer_offset]);
+            break;
        case SurfaceTarget::Texture2D:
            glTextureSubImage2D(texture.handle, mip_map, x0, y0,
                                static_cast<GLsizei>(rect.GetWidth()),
@@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
                                  Tegra::Texture::SwizzleSource swizzle_y,
                                  Tegra::Texture::SwizzleSource swizzle_z,
                                  Tegra::Texture::SwizzleSource swizzle_w) {
+    if (params.target == SurfaceTarget::TextureBuffer) {
+        return;
+    }
    const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
    const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
    const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -250,6 +250,8 @@ struct SurfaceParams {
        switch (target) {
        case SurfaceTarget::Texture1D:
            return "1D";
+        case SurfaceTarget::TextureBuffer:
+            return "Buffer";
        case SurfaceTarget::Texture2D:
            return "2D";
        case SurfaceTarget::Texture3D:
@@ -439,6 +441,7 @@ private:

    OGLTexture texture;
    OGLTexture discrepant_view;
+    OGLBuffer texture_buffer;
    SurfaceParams params{};
    GLenum gl_target{};
    GLenum gl_internal_format{};
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -164,8 +164,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr
 }

 CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
-                               Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
-                               GLenum primitive_mode, bool hint_retrievable = false) {
+                               Maxwell::ShaderProgram program_type, const ProgramVariant& variant,
+                               bool hint_retrievable = false) {
+    auto base_bindings{variant.base_bindings};
+    const auto primitive_mode{variant.primitive_mode};
+    const auto texture_buffer_usage{variant.texture_buffer_usage};
+
    std::string source = "#version 430 core\n";
    source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);

@@ -181,6 +185,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
        source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
                              base_bindings.sampler++);
    }
+    for (const auto& image : entries.images) {
+        source +=
+            fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
+    }
+
+    // Transform 1D textures to texture samplers by declaring its preprocessor macros.
+    for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) {
+        if (!texture_buffer_usage.test(i)) {
+            continue;
+        }
+        source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
+    }

    if (program_type == Maxwell::ShaderProgram::Geometry) {
        const auto [glsl_topology, debug_name, max_vertices] =
@@ -256,20 +272,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
    shader_length = entries.shader_length;
 }

-std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
-                                                                BaseBindings base_bindings) {
+std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
    GLuint handle{};
    if (program_type == Maxwell::ShaderProgram::Geometry) {
-        handle = GetGeometryShader(primitive_mode, base_bindings);
+        handle = GetGeometryShader(variant);
    } else {
-        const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
+        const auto [entry, is_cache_miss] = programs.try_emplace(variant);
        auto& program = entry->second;
        if (is_cache_miss) {
-            program = TryLoadProgram(primitive_mode, base_bindings);
+            program = TryLoadProgram(variant);
            if (!program) {
-                program =
-                    SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
-                disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+                program = SpecializeShader(code, entries, program_type, variant);
+                disk_cache.SaveUsage(GetUsage(variant));
            }

            LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
@@ -278,6 +292,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
        handle = program->handle;
    }

+    auto base_bindings{variant.base_bindings};
    base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
    base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
    base_bindings.sampler += static_cast<u32>(entries.samplers.size());
@@ -285,43 +300,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
    return {handle, base_bindings};
 }

-GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
-    const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
+GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) {
+    const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant);
    auto& programs = entry->second;

-    switch (primitive_mode) {
+    switch (variant.primitive_mode) {
    case GL_POINTS:
-        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.points, variant);
    case GL_LINES:
    case GL_LINE_STRIP:
-        return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.lines, variant);
    case GL_LINES_ADJACENCY:
    case GL_LINE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.lines_adjacency, variant);
    case GL_TRIANGLES:
    case GL_TRIANGLE_STRIP:
    case GL_TRIANGLE_FAN:
-        return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.triangles, variant);
    case GL_TRIANGLES_ADJACENCY:
    case GL_TRIANGLE_STRIP_ADJACENCY:
-        return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.triangles_adjacency, variant);
    default:
        UNREACHABLE_MSG("Unknown primitive mode.");
-        return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
+        return LazyGeometryProgram(programs.points, variant);
    }
 }

-GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
-                                         GLenum primitive_mode) {
+GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program,
+                                         const ProgramVariant& variant) {
    if (target_program) {
        return target_program->handle;
    }
-    const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
-    target_program = TryLoadProgram(primitive_mode, base_bindings);
+    const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode);
+    target_program = TryLoadProgram(variant);
    if (!target_program) {
-        target_program =
-            SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
-        disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
+        target_program = SpecializeShader(code, entries, program_type, variant);
+        disk_cache.SaveUsage(GetUsage(variant));
    }

    LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
@@ -329,18 +343,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
    return target_program->handle;
 };

-CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
-                                           BaseBindings base_bindings) const {
-    const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
+CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
+    const auto found = precompiled_programs.find(GetUsage(variant));
    if (found == precompiled_programs.end()) {
        return {};
    }
    return found->second;
 }

-ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
-                                            BaseBindings base_bindings) const {
-    return {unique_identifier, base_bindings, primitive_mode};
+ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
+    ShaderDiskCacheUsage usage;
+    usage.unique_identifier = unique_identifier;
+    usage.variant = variant;
+    return usage;
 }

 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -394,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
        }
        if (!shader) {
            shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
-                                      usage.bindings, usage.primitive, true);
+                                      usage.variant, true);
        }
        precompiled_programs.insert({usage, std::move(shader)});

--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <bitset>
 #include <memory>
 #include <set>
 #include <tuple>
@@ -22,7 +23,7 @@

 namespace Core {
 class System;
-} // namespace Core
+}

 namespace OpenGL {

@@ -63,8 +64,7 @@ public:
    }

    /// Gets the GL program handle for the shader
-    std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
-                                                      BaseBindings base_bindings);
+    std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);

 private:
    // Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -78,15 +78,14 @@ private:
        CachedProgram triangles_adjacency;
    };

-    GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
+    GLuint GetGeometryShader(const ProgramVariant& variant);

    /// Generates a geometry shader or returns one that already exists.
-    GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
-                               GLenum primitive_mode);
+    GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant);

-    CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
+    CachedProgram TryLoadProgram(const ProgramVariant& variant) const;

-    ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
+    ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;

    u8* host_ptr{};
    VAddr cpu_addr{};
@@ -100,8 +99,8 @@ private:

    std::string code;

-    std::unordered_map<BaseBindings, CachedProgram> programs;
-    std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
+    std::unordered_map<ProgramVariant, CachedProgram> programs;
+    std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs;

    std::unordered_map<u32, GLuint> cbuf_resource_cache;
    std::unordered_map<u32, GLuint> gmem_resource_cache;
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -27,6 +27,7 @@ struct ShaderEntries;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using ProgramResult = std::pair<std::string, ShaderEntries>;
 using SamplerEntry = VideoCommon::Shader::Sampler;
+using ImageEntry = VideoCommon::Shader::Image;

 class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
 public:
@@ -74,6 +75,7 @@ struct ShaderEntries {
    std::vector<ConstBufferEntry> const_buffers;
    std::vector<SamplerEntry> samplers;
    std::vector<SamplerEntry> bindless_samplers;
+    std::vector<ImageEntry> images;
    std::vector<GlobalMemoryEntry> global_memory_entries;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
    std::size_t shader_length{};
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 {
    Dump,
 };

-constexpr u32 NativeVersion = 1;
+constexpr u32 NativeVersion = 3;

 // Making sure sizes doesn't change by accident
-static_assert(sizeof(BaseBindings) == 12);
-static_assert(sizeof(ShaderDiskCacheUsage) == 24);
+static_assert(sizeof(BaseBindings) == 16);
+static_assert(sizeof(ShaderDiskCacheUsage) == 40);

 namespace {

@@ -104,9 +104,8 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
    return true;
 }

-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
+    : system{system}, precompiled_cache_virtual_file_offset{0} {}

 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
 ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -244,7 +243,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
                return {};
            }

-            auto entry = LoadDecompiledEntry();
+            const auto entry = LoadDecompiledEntry();
            if (!entry) {
                return {};
            }
@@ -287,82 +286,97 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
    if (!LoadObjectFromPrecompiled(code_size)) {
        return {};
    }
-
-    std::string code(code_size, '\0');
+    std::vector<u8> code(code_size);
    if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
        return {};
    }

    ShaderDiskCacheDecompiled entry;
-    entry.code = std::move(code);
+    entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);

    u32 const_buffers_count{};
    if (!LoadObjectFromPrecompiled(const_buffers_count)) {
        return {};
    }
-
    for (u32 i = 0; i < const_buffers_count; ++i) {
        u32 max_offset{};
        u32 index{};
-        bool is_indirect{};
+        u8 is_indirect{};
        if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
            !LoadObjectFromPrecompiled(is_indirect)) {
            return {};
        }
-        entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
+        entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
    }

    u32 samplers_count{};
    if (!LoadObjectFromPrecompiled(samplers_count)) {
        return {};
    }
-
    for (u32 i = 0; i < samplers_count; ++i) {
        u64 offset{};
        u64 index{};
        u32 type{};
-        bool is_array{};
-        bool is_shadow{};
-        bool is_bindless{};
+        u8 is_array{};
+        u8 is_shadow{};
+        u8 is_bindless{};
        if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
            !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
            !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
            return {};
        }
-        entry.entries.samplers.emplace_back(
+        entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
+                                            static_cast<std::size_t>(index),
+                                            static_cast<Tegra::Shader::TextureType>(type),
+                                            is_array != 0, is_shadow != 0, is_bindless != 0);
+    }
+
+    u32 images_count{};
+    if (!LoadObjectFromPrecompiled(images_count)) {
+        return {};
+    }
+    for (u32 i = 0; i < images_count; ++i) {
+        u64 offset{};
+        u64 index{};
+        u32 type{};
+        u8 is_bindless{};
+        if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
+            !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
+            return {};
+        }
+        entry.entries.images.emplace_back(
            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
-            static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
+            static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
    }

    u32 global_memory_count{};
    if (!LoadObjectFromPrecompiled(global_memory_count)) {
        return {};
    }
-
    for (u32 i = 0; i < global_memory_count; ++i) {
        u32 cbuf_index{};
        u32 cbuf_offset{};
-        bool is_read{};
-        bool is_written{};
+        u8 is_read{};
+        u8 is_written{};
        if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
            !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
            return {};
        }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
-                                                         is_written);
+        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
+                                                         is_written != 0);
    }

    for (auto& clip_distance : entry.entries.clip_distances) {
-        if (!LoadObjectFromPrecompiled(clip_distance)) {
+        u8 clip_distance_raw{};
+        if (!LoadObjectFromPrecompiled(clip_distance_raw))
            return {};
-        }
+        clip_distance = clip_distance_raw != 0;
    }

    u64 shader_length{};
    if (!LoadObjectFromPrecompiled(shader_length)) {
        return {};
    }
-
    entry.entries.shader_length = static_cast<std::size_t>(shader_length);

    return entry;
@@ -383,7 +397,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
    for (const auto& cbuf : entries.const_buffers) {
        if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
            !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
-            !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
+            !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
            return false;
        }
    }
@@ -395,9 +409,21 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
        if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
            !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
            !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
-            !SaveObjectToPrecompiled(sampler.IsArray()) ||
-            !SaveObjectToPrecompiled(sampler.IsShadow()) ||
-            !SaveObjectToPrecompiled(sampler.IsBindless())) {
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+            return false;
+        }
+    }
+
+    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
+        return false;
+    }
+    for (const auto& image : entries.images) {
+        if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
+            !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
+            !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
            return false;
        }
    }
@@ -408,13 +434,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
    for (const auto& gmem : entries.global_memory_entries) {
        if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
            !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
-            !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
+            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
+            !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
            return false;
        }
    }

    for (const bool clip_distance : entries.clip_distances) {
-        if (!SaveObjectToPrecompiled(clip_distance)) {
+        if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
            return false;
        }
    }
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -33,14 +33,18 @@ namespace OpenGL {
 using ProgramCode = std::vector<u64>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

-/// Allocated bindings used by an OpenGL shader program
+using TextureBufferUsage = std::bitset<64>;
+
+/// Allocated bindings used by an OpenGL shader program.
 struct BaseBindings {
    u32 cbuf{};
    u32 gmem{};
    u32 sampler{};
+    u32 image{};

    bool operator==(const BaseBindings& rhs) const {
-        return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
+        return std::tie(cbuf, gmem, sampler, image) ==
+               std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
    }

    bool operator!=(const BaseBindings& rhs) const {
@@ -48,15 +52,29 @@ struct BaseBindings {
    }
 };

-/// Describes how a shader is used
+/// Describes the different variants a single program can be compiled.
+struct ProgramVariant {
+    BaseBindings base_bindings;
+    GLenum primitive_mode{};
+    TextureBufferUsage texture_buffer_usage{};
+
+    bool operator==(const ProgramVariant& rhs) const {
+        return std::tie(base_bindings, primitive_mode, texture_buffer_usage) ==
+               std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage);
+    }
+
+    bool operator!=(const ProgramVariant& rhs) const {
+        return !operator==(rhs);
+    }
+};
+
+/// Describes how a shader is used.
 struct ShaderDiskCacheUsage {
    u64 unique_identifier{};
-    BaseBindings bindings;
-    GLenum primitive{};
+    ProgramVariant variant;

    bool operator==(const ShaderDiskCacheUsage& rhs) const {
-        return std::tie(unique_identifier, bindings, primitive) ==
-               std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
+        return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
    }

    bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -70,16 +88,28 @@ namespace std {

 template <>
 struct hash<OpenGL::BaseBindings> {
-    std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
-        return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
+    std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+        return static_cast<std::size_t>(bindings.cbuf) ^
+               (static_cast<std::size_t>(bindings.gmem) << 8) ^
+               (static_cast<std::size_t>(bindings.sampler) << 16) ^
+               (static_cast<std::size_t>(bindings.image) << 24);
+    }
+};
+
+template <>
+struct hash<OpenGL::ProgramVariant> {
+    std::size_t operator()(const OpenGL::ProgramVariant& variant) const {
+        return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^
+               std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^
+               (static_cast<std::size_t>(variant.primitive_mode) << 6);
    }
 };

 template <>
 struct hash<OpenGL::ShaderDiskCacheUsage> {
-    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
+    std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
        return static_cast<std::size_t>(usage.unique_identifier) ^
-               std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
+               std::hash<OpenGL::ProgramVariant>()(usage.variant);
    }
 };

@@ -162,7 +192,6 @@ struct ShaderDiskCacheDump {
 class ShaderDiskCacheOpenGL {
 public:
    explicit ShaderDiskCacheOpenGL(Core::System& system);
-    ~ShaderDiskCacheOpenGL();

    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
    std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -260,35 +289,21 @@ private:
        return SaveArrayToPrecompiled(&object, 1);
    }

-    bool SaveObjectToPrecompiled(bool object) {
-        const auto value = static_cast<u8>(object);
-        return SaveArrayToPrecompiled(&value, 1);
-    }
-
    template <typename T>
    bool LoadObjectFromPrecompiled(T& object) {
        return LoadArrayFromPrecompiled(&object, 1);
    }

-    bool LoadObjectFromPrecompiled(bool& object) {
-        u8 value;
-        const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
-        if (!read_ok) {
-            return false;
-        }
-
-        object = value != 0;
-        return true;
-    }
-
-    // Core system
    Core::System& system;
-    // Stored transferable shaders
-    std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
-    // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
+
+    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
+    // file
    FileSys::VectorVfsFile precompiled_cache_virtual_file;
    // Stores the current offset of the precompiled cache file for IO purposes
-    std::size_t precompiled_cache_virtual_file_offset = 0;
+    std::size_t precompiled_cache_virtual_file_offset;
+
+    // Stored transferable shaders
+    std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;

    // The cache has been loaded at boot
    bool tried_to_load{};
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
 };

 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
    ProgramResult program =
        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");

    out += program.first;

    if (setup.IsDualProgram()) {
-        const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
+        ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
        ProgramResult program_b =
            Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");

@@ -76,7 +76,7 @@ void main() {
    }
 })";

-    return {std::move(out), std::move(program.second)};
+    return {out, program.second};
 }

 ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
 };

 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
    ProgramResult program =
        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
    out += program.first;
@@ -107,7 +107,7 @@ void main() {
    execute_geometry();
 };)";

-    return {std::move(out), std::move(program.second)};
+    return {out, program.second};
 }

 ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
 }

 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
    ProgramResult program =
        Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");

@@ -172,7 +172,7 @@ void main() {
 }

 )";
-    return {std::move(out), std::move(program.second)};
+    return {out, program.second};
 }

 } // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

 namespace OpenGL {

-OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent)
+OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
+                                 bool use_persistent)
    : buffer_size(size) {
    gl_buffer.Create();

@@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
        allocate_size *= 2;
    }

-    if (GLAD_GL_ARB_buffer_storage) {
+    if (use_persistent) {
        persistent = true;
        coherent = prefer_coherent;
        const GLbitfield flags =
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -13,7 +13,8 @@ namespace OpenGL {

 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false);
+    explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
+                             bool use_persistent = true);
    ~OGLStreamBuffer();

    GLuint GetHandle() const;
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -126,8 +126,6 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
        return GL_TRIANGLES;
    case Maxwell::PrimitiveTopology::TriangleStrip:
        return GL_TRIANGLE_STRIP;
-    case Maxwell::PrimitiveTopology::TriangleFan:
-        return GL_TRIANGLE_FAN;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
        UNREACHABLE();
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -472,7 +472,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
    }
 }

-/// Initialize the renderer
 bool RendererOpenGL::Init() {
    Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};

--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -194,8 +194,8 @@ public:
        for (const auto& sampler : ir.GetSamplers()) {
            entries.samplers.emplace_back(sampler);
        }
-        for (const auto& attribute : ir.GetInputAttributes()) {
-            entries.attributes.insert(GetGenericAttributeLocation(attribute));
+        for (const auto& attr : ir.GetInputAttributes()) {
+            entries.attributes.insert(GetGenericAttributeLocation(attr.first));
        }
        entries.clip_distances = ir.GetClipDistances();
        entries.shader_length = ir.GetLength();
@@ -321,7 +321,8 @@ private:
    }

    void DeclareInputAttributes() {
-        for (const auto index : ir.GetInputAttributes()) {
+        for (const auto element : ir.GetInputAttributes()) {
+            const Attribute::Index index = element.first;
            if (!IsGenericAttribute(index)) {
                continue;
            }
@@ -929,6 +930,11 @@ private:
        return {};
    }

+    Id ImageStore(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
    Id Branch(Operation operation) {
        const auto target = std::get_if<ImmediateNode>(operation[0]);
        UNIMPLEMENTED_IF(!target);
@@ -1281,6 +1287,8 @@ private:
        &SPIRVDecompiler::TextureQueryLod,
        &SPIRVDecompiler::TexelFetch,

+        &SPIRVDecompiler::ImageStore,
+
        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::PushFlowStack,
        &SPIRVDecompiler::PopFlowStack,
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -168,6 +168,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
        {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
+        {OpCode::Type::Image, &ShaderIR::DecodeImage},
        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -4,7 +4,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

@@ -153,4 +152,4 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -4,7 +4,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -4,7 +4,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

@@ -48,4 +47,4 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -49,4 +49,4 @@ u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -93,4 +93,4 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
    }
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -46,4 +46,4 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -38,4 +38,4 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -56,4 +56,4 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -55,4 +55,4 @@ u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -53,4 +53,4 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -6,7 +6,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

@@ -65,4 +64,4 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -59,4 +59,4 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -0,0 +1,115 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+namespace {
+std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
+    switch (image_type) {
+    case Tegra::Shader::ImageType::Texture1D:
+    case Tegra::Shader::ImageType::TextureBuffer:
+        return 1;
+    case Tegra::Shader::ImageType::Texture1DArray:
+    case Tegra::Shader::ImageType::Texture2D:
+        return 2;
+    case Tegra::Shader::ImageType::Texture2DArray:
+    case Tegra::Shader::ImageType::Texture3D:
+        return 3;
+    }
+    UNREACHABLE();
+    return 1;
+}
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::SUST: {
+        UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
+        UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
+        UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
+        UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
+
+        std::vector<Node> values;
+        constexpr std::size_t hardcoded_size{4};
+        for (std::size_t i = 0; i < hardcoded_size; ++i) {
+            values.push_back(GetRegister(instr.gpr0.Value() + i));
+        }
+
+        std::vector<Node> coords;
+        const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
+        for (std::size_t i = 0; i < num_coords; ++i) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + i));
+        }
+
+        const auto type{instr.sust.image_type};
+        const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
+                                                  : GetBindlessImage(instr.gpr39, type)};
+        MetaImage meta{image, values};
+        const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
+        bb.push_back(store);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
+    const auto offset{static_cast<std::size_t>(image.index.Value())};
+
+    // If this image has already been used, return the existing mapping.
+    const auto itr{std::find_if(used_images.begin(), used_images.end(),
+                                [=](const Image& entry) { return entry.GetOffset() == offset; })};
+    if (itr != used_images.end()) {
+        ASSERT(itr->GetType() == type);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this image.
+    const std::size_t next_index{used_images.size()};
+    const Image entry{offset, next_index, type};
+    return *used_images.emplace(entry).first;
+}
+
+const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
+                                        Tegra::Shader::ImageType type) {
+    const Node image_register{GetRegister(reg)};
+    const Node base_image{
+        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
+    const auto cbuf{std::get_if<CbufNode>(base_image)};
+    const auto cbuf_offset_imm{std::get_if<ImmediateNode>(cbuf->GetOffset())};
+    const auto cbuf_offset{cbuf_offset_imm->GetValue()};
+    const auto cbuf_index{cbuf->GetIndex()};
+    const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
+
+    // If this image has already been used, return the existing mapping.
+    const auto itr{std::find_if(used_images.begin(), used_images.end(),
+                                [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
+    if (itr != used_images.end()) {
+        ASSERT(itr->GetType() == type);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this image.
+    const std::size_t next_index{used_images.size()};
+    const Image entry{cbuf_index, cbuf_offset, next_index, type};
+    return *used_images.emplace(entry).first;
+}
+
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"
@@ -46,4 +47,4 @@ u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -50,4 +50,4 @@ u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -47,20 +47,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
                             "Indirect attribute loads are not supported");
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");
-        UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
-                                 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
-                             "Non-32 bits PHYS reads are not implemented");

-        const Node buffer{GetRegister(instr.gpr39)};
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
+                                          Tegra::Shader::IpaSampleMode::Default};

        u64 next_element = instr.attribute.fmt20.element;
        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());

        const auto LoadNextElement = [&](u32 reg_offset) {
-            const Node attribute{instr.attribute.fmt20.IsPhysical()
-                                     ? GetPhysicalInputAttribute(instr.gpr8, buffer)
-                                     : GetInputAttribute(static_cast<Attribute::Index>(next_index),
-                                                         next_element, buffer)};
+            const Node buffer = GetRegister(instr.gpr39);
+            const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
+                                                     next_element, input_mode, buffer);

            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);

@@ -146,25 +143,12 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        break;
    }
-    case OpCode::Id::LD:
    case OpCode::Id::LDG: {
-        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::LD:
-                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
-                return instr.generic.type;
-            case OpCode::Id::LDG:
-                return instr.ldg.type;
-            default:
-                UNREACHABLE();
-                return {};
-            }
-        }();
-
        const auto [real_address_base, base_address, descriptor] =
-            TrackAndGetGlobalMemory(bb, instr, false);
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.ldg.immediate_offset.Value()), false);

-        const u32 count = GetUniformTypeElementsCount(type);
+        const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address =
@@ -178,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        break;
    }
+    case OpCode::Id::STG: {
+        const auto [real_address_base, base_address, descriptor] =
+            TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
+                                    static_cast<u32>(instr.stg.immediate_offset.Value()), true);
+
+        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
+        SetTemporal(bb, 0, real_address_base);
+
+        const u32 count = GetUniformTypeElementsCount(instr.stg.type);
+        for (u32 i = 0; i < count; ++i) {
+            SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
+        }
+        for (u32 i = 0; i < count; ++i) {
+            const Node it_offset = Immediate(i * 4);
+            const Node real_address =
+                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
+
+            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
+        }
+        break;
+    }
    case OpCode::Id::ST_A: {
        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                             "Indirect attribute loads are not supported");
@@ -233,56 +239,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        break;
    }
-    case OpCode::Id::ST:
-    case OpCode::Id::STG: {
-        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
-            switch (opcode->get().GetId()) {
-            case OpCode::Id::ST:
-                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
-                return instr.generic.type;
-            case OpCode::Id::STG:
-                return instr.stg.type;
-            default:
-                UNREACHABLE();
-                return {};
-            }
-        }();
-
-        const auto [real_address_base, base_address, descriptor] =
-            TrackAndGetGlobalMemory(bb, instr, true);
-
-        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
-        SetTemporal(bb, 0, real_address_base);
-
-        const u32 count = GetUniformTypeElementsCount(type);
-        for (u32 i = 0; i < count; ++i) {
-            SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
-        }
-        for (u32 i = 0; i < count; ++i) {
-            const Node it_offset = Immediate(i * 4);
-            const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
-            const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
-
-            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
-        }
-        break;
-    }
-    case OpCode::Id::AL2P: {
-        // Ignore al2p.direction since we don't care about it.
-
-        // Calculate emulation fake physical address.
-        const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
-        const Node reg{GetRegister(instr.gpr8)};
-        const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
-
-        // Set the fake address to target register.
-        SetRegister(bb, instr.gpr0, fake_address);
-
-        // Signal the shader IR to declare all possible attributes and varyings
-        uses_physical_attributes = true;
-        break;
-    }
    default:
        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
    }
@@ -291,11 +247,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
 }

 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
-                                                                           Instruction instr,
+                                                                           Node addr_register,
+                                                                           u32 immediate_offset,
                                                                           bool is_write) {
-    const auto addr_register{GetRegister(instr.gmem.gpr)};
-    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
-
    const Node base_address{
        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
    const auto cbuf = std::get_if<CbufNode>(base_address);
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -4,7 +4,6 @@

 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/shader_ir.h"

@@ -131,18 +130,15 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
        break;
    }
    case OpCode::Id::IPA: {
-        const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
-
-        const auto attribute = instr.attribute.fmt28;
+        const auto& attribute = instr.attribute.fmt28;
        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
                                                instr.ipa.sample_mode.Value()};

-        Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
-                                 : GetInputAttribute(attribute.index, attribute.element);
+        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
+        Node value = attr;
        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-                                index <= Tegra::Shader::Attribute::Index::Attribute_31;
-        if (is_generic || is_physical) {
+        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
            // In theory by setting them as perspective, OpenGL does the perspective correction.
            // A way must figured to reverse the last step of it.
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -64,4 +64,4 @@ u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -43,4 +43,4 @@ u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -48,4 +48,4 @@ u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -52,4 +52,4 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -244,6 +244,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
        }
        break;
    }
+    case OpCode::Id::TLD: {
+        UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
+
+        if (instr.tld.nodep_flag) {
+            LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
+        }
+
+        WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
+        break;
+    }
    case OpCode::Id::TLDS: {
        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
        const bool is_array{instr.tlds.IsArrayTexture()};
@@ -574,6 +586,38 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    return values;
 }

+Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
+    const auto texture_type{instr.tld.texture_type};
+    const bool is_array{instr.tld.is_array};
+    const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
+    const std::size_t coord_count{GetCoordCount(texture_type)};
+
+    u64 gpr8_cursor{instr.gpr8.Value()};
+    const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(gpr8_cursor++));
+    }
+
+    u64 gpr20_cursor{instr.gpr20.Value()};
+    // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
+    const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
+    // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
+    // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
+    }
+
+    return values;
+}
+
 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
    const std::size_t type_coord_count = GetCoordCount(texture_type);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -108,4 +108,4 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
    }
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -21,13 +21,6 @@ using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;

-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
-    : program_code{program_code}, main_offset{main_offset} {
-    Decode();
-}
-
-ShaderIR::~ShaderIR() = default;
-
 Node ShaderIR::StoreNode(NodeData&& node_data) {
    auto store = std::make_unique<NodeData>(node_data);
    const Node node = store.get();
@@ -39,8 +32,8 @@ Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) {
    return StoreNode(ConditionalNode(condition, std::move(code)));
 }

-Node ShaderIR::Comment(std::string text) {
-    return StoreNode(CommentNode(std::move(text)));
+Node ShaderIR::Comment(const std::string& text) {
+    return StoreNode(CommentNode(text));
 }

 Node ShaderIR::Immediate(u32 value) {
@@ -96,14 +89,13 @@ Node ShaderIR::GetPredicate(bool immediate) {
    return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
 }

-Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
-    used_input_attributes.emplace(index);
-    return StoreNode(AbufNode(index, static_cast<u32>(element), buffer));
-}
+Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element,
+                                 const Tegra::Shader::IpaMode& input_mode, Node buffer) {
+    const auto [entry, is_new] =
+        used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{}));
+    entry->second.insert(input_mode);

-Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
-    uses_physical_attributes = true;
-    return StoreNode(AbufNode(GetRegister(physical_address), buffer));
+    return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer));
 }

 Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -172,6 +172,8 @@ enum class OperationCode {
    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
    TexelFetch,             /// (MetaTexture, int[N], int) -> float4

+    ImageStore, /// (MetaImage, float[N] coords) -> void
+
    Branch,        /// (uint branch_target) -> void
    PushFlowStack, /// (uint branch_target) -> void
    PopFlowStack,  /// () -> void
@@ -267,6 +269,48 @@ private:
    bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 };

+class Image {
+public:
+    explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
+        : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+
+    explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+                   Tegra::Shader::ImageType type)
+        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+          is_bindless{true} {}
+
+    explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+                   bool is_bindless)
+        : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
+
+    std::size_t GetOffset() const {
+        return offset;
+    }
+
+    std::size_t GetIndex() const {
+        return index;
+    }
+
+    Tegra::Shader::ImageType GetType() const {
+        return type;
+    }
+
+    bool IsBindless() const {
+        return is_bindless;
+    }
+
+    bool operator<(const Image& rhs) const {
+        return std::tie(offset, index, type, is_bindless) <
+               std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
+    }
+
+private:
+    std::size_t offset{};
+    std::size_t index{};
+    Tegra::Shader::ImageType type{};
+    bool is_bindless{};
+};
+
 class ConstBuffer {
 public:
    explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -328,31 +372,45 @@ struct MetaTexture {
    u32 element{};
 };

-constexpr MetaArithmetic PRECISE = {true};
-constexpr MetaArithmetic NO_PRECISE = {false};
+struct MetaImage {
+    const Image& image;
+    std::vector<Node> values;
+};

-using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>;
+inline constexpr MetaArithmetic PRECISE = {true};
+inline constexpr MetaArithmetic NO_PRECISE = {false};
+
+using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, Tegra::Shader::HalfType>;

 /// Holds any kind of operation that can be done in the IR
 class OperationNode final {
 public:
-    explicit OperationNode(OperationCode code) : code{code} {}
-
-    explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {}
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {}

    template <typename... T>
-    explicit OperationNode(OperationCode code, const T*... operands)
+    explicit constexpr OperationNode(OperationCode code, Meta&& meta)
+        : code{code}, meta{std::move(meta)} {}
+
+    template <typename... T>
+    explicit constexpr OperationNode(OperationCode code, const T*... operands)
        : OperationNode(code, {}, operands...) {}

    template <typename... T>
-    explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
-        : code{code}, meta{std::move(meta)}, operands{operands_...} {}
+    explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_)
+        : code{code}, meta{std::move(meta)} {
+
+        auto operands_list = {operands_...};
+        for (auto& operand : operands_list) {
+            operands.push_back(operand);
+        }
+    }

    explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands)
        : code{code}, meta{meta}, operands{std::move(operands)} {}

    explicit OperationNode(OperationCode code, std::vector<Node>&& operands)
-        : code{code}, operands{std::move(operands)} {}
+        : code{code}, meta{}, operands{std::move(operands)} {}

    OperationCode GetCode() const {
        return code;
@@ -456,14 +514,17 @@ private:
 /// Attribute buffer memory (known as attributes or varyings in GLSL terms)
 class AbufNode final {
 public:
-    // Initialize for standard attributes (index is explicit).
+    explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
+                                const Tegra::Shader::IpaMode& input_mode, Node buffer = {})
+        : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {}
+
    explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element,
                                Node buffer = {})
-        : buffer{buffer}, index{index}, element{element} {}
+        : input_mode{}, buffer{buffer}, index{index}, element{element} {}

-    // Initialize for physical attributes (index is a variable value).
-    explicit constexpr AbufNode(Node physical_address, Node buffer = {})
-        : physical_address{physical_address}, buffer{buffer} {}
+    Tegra::Shader::IpaMode GetInputMode() const {
+        return input_mode;
+    }

    Tegra::Shader::Attribute::Index GetIndex() const {
        return index;
@@ -477,19 +538,11 @@ public:
        return buffer;
    }

-    bool IsPhysicalBuffer() const {
-        return physical_address != nullptr;
-    }
-
-    Node GetPhysicalAddress() const {
-        return physical_address;
-    }
-
 private:
-    Node physical_address{};
-    Node buffer{};
-    Tegra::Shader::Attribute::Index index{};
-    u32 element{};
+    const Tegra::Shader::IpaMode input_mode;
+    const Node buffer;
+    const Tegra::Shader::Attribute::Index index;
+    const u32 element;
 };

 /// Constant buffer node, usually mapped to uniform buffers in GLSL
@@ -563,8 +616,11 @@ private:

 class ShaderIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
-    ~ShaderIR();
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset)
+        : program_code{program_code}, main_offset{main_offset} {
+
+        Decode();
+    }

    const std::map<u32, NodeBlock>& GetBasicBlocks() const {
        return basic_blocks;
@@ -578,7 +634,8 @@ public:
        return used_predicates;
    }

-    const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
+    const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>&
+    GetInputAttributes() const {
        return used_input_attributes;
    }

@@ -594,6 +651,10 @@ public:
        return used_samplers;
    }

+    const std::set<Image>& GetImages() const {
+        return used_images;
+    }
+
    const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
        const {
        return used_clip_distances;
@@ -607,10 +668,6 @@ public:
        return static_cast<std::size_t>(coverage_end * sizeof(u64));
    }

-    bool HasPhysicalAttributes() const {
-        return uses_physical_attributes;
-    }
-
    const Tegra::Shader::Header& GetHeader() const {
        return header;
    }
@@ -644,6 +701,7 @@ private:
    u32 DecodeConversion(NodeBlock& bb, u32 pc);
    u32 DecodeMemory(NodeBlock& bb, u32 pc);
    u32 DecodeTexture(NodeBlock& bb, u32 pc);
+    u32 DecodeImage(NodeBlock& bb, u32 pc);
    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
@@ -663,7 +721,7 @@ private:
    /// Creates a conditional node
    Node Conditional(Node condition, std::vector<Node>&& code);
    /// Creates a commentary
-    Node Comment(std::string text);
+    Node Comment(const std::string& text);
    /// Creates an u32 immediate
    Node Immediate(u32 value);
    /// Creates a s32 immediate
@@ -692,9 +750,8 @@ private:
    /// Generates a predicate node for an immediate true or false value
    Node GetPredicate(bool immediate);
    /// Generates a node representing an input attribute. Keeps track of used attributes.
-    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
-    /// Generates a node representing a physical input attribute.
-    Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
+    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element,
+                           const Tegra::Shader::IpaMode& input_mode, Node buffer = {});
    /// Generates a node representing an output attribute. Keeps track of used attributes.
    Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
    /// Generates a node representing an internal flag
@@ -764,6 +821,12 @@ private:
                                      Tegra::Shader::TextureType type, bool is_array,
                                      bool is_shadow);

+    /// Accesses an image.
+    const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
+
+    /// Access a bindless image sampler.
+    const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
+
    /// Extracts a sequence of bits from a node
    Node BitfieldExtract(Node value, u32 offset, u32 bits);

@@ -787,6 +850,8 @@ private:
    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      bool depth_compare, bool is_array, bool is_aoffi);

+    Node4 GetTldCode(Tegra::Shader::Instruction instr);
+
    Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      bool is_array);

@@ -811,15 +876,16 @@ private:
    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
                              Node op_c, Node imm_lut, bool sets_cc);

-    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
+    Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);

-    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
+    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);

-    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                       s64 cursor) const;
+    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);

-    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
-        NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
+    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
+                                                                     Node addr_register,
+                                                                     u32 immediate_offset,
+                                                                     bool is_write);

    template <typename... T>
    Node Operation(OperationCode code, const T*... operands) {
@@ -831,10 +897,12 @@ private:
        return StoreNode(OperationNode(code, std::move(meta), operands...));
    }

+    template <typename... T>
    Node Operation(OperationCode code, std::vector<Node>&& operands) {
        return StoreNode(OperationNode(code, std::move(operands)));
    }

+    template <typename... T>
    Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) {
        return StoreNode(OperationNode(code, std::move(meta), std::move(operands)));
    }
@@ -866,13 +934,14 @@ private:

    std::set<u32> used_registers;
    std::set<Tegra::Shader::Pred> used_predicates;
-    std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
+    std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>
+        used_input_attributes;
    std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
    std::map<u32, ConstBuffer> used_cbufs;
    std::set<Sampler> used_samplers;
+    std::set<Image> used_images;
    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
-    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes

    Tegra::Shader::Header header;
 };
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -17,24 +17,22 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
    for (; cursor >= 0; --cursor) {
        const Node node = code.at(cursor);
        if (const auto operation = std::get_if<OperationNode>(node)) {
-            if (operation->GetCode() == operation_code) {
+            if (operation->GetCode() == operation_code)
                return {node, cursor};
-            }
        }
        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
            const auto& conditional_code = conditional->GetCode();
            const auto [found, internal_cursor] = FindOperation(
                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
-            if (found) {
+            if (found)
                return {found, cursor};
-            }
        }
    }
    return {};
 }
 } // namespace

-Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const {
+Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
    if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
        // Cbuf found, but it has to be immediate
        return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
@@ -67,7 +65,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const
    return nullptr;
 }

-std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
    // that it uses as operand
    const auto [found, found_cursor] =
@@ -82,7 +80,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
 }

 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
-                                             s64 cursor) const {
+                                             s64 cursor) {
    for (; cursor >= 0; --cursor) {
        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
        if (!found_node) {
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
    switch (texture_type) {
    case Tegra::Texture::TextureType::Texture1D:
        return SurfaceTarget::Texture1D;
+    case Tegra::Texture::TextureType::Texture1DBuffer:
+        return SurfaceTarget::TextureBuffer;
    case Tegra::Texture::TextureType::Texture2D:
    case Tegra::Texture::TextureType::Texture2DNoMipmap:
        return SurfaceTarget::Texture2D;
@@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t
 bool SurfaceTargetIsLayered(SurfaceTarget target) {
    switch (target) {
    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::TextureBuffer:
    case SurfaceTarget::Texture2D:
    case SurfaceTarget::Texture3D:
        return false;
@@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) {
 bool SurfaceTargetIsArray(SurfaceTarget target) {
    switch (target) {
    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::TextureBuffer:
    case SurfaceTarget::Texture2D:
    case SurfaceTarget::Texture3D:
    case SurfaceTarget::TextureCubemap:
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -114,6 +114,7 @@ enum class SurfaceType {

 enum class SurfaceTarget {
    Texture1D,
+    TextureBuffer,
    Texture2D,
    Texture3D,
    Texture1DArray,
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -172,12 +172,16 @@ struct TICEntry {
        BitField<26, 1, u32> use_header_opt_control;
        BitField<27, 1, u32> depth_texture;
        BitField<28, 4, u32> max_mip_level;
+
+        BitField<0, 16, u32> buffer_high_width_minus_one;
    };
    union {
        BitField<0, 16, u32> width_minus_1;
        BitField<22, 1, u32> srgb_conversion;
        BitField<23, 4, TextureType> texture_type;
        BitField<29, 3, u32> border_size;
+
+        BitField<0, 16, u32> buffer_low_width_minus_one;
    };
    union {
        BitField<0, 16, u32> height_minus_1;
@@ -206,7 +210,10 @@ struct TICEntry {
    }

    u32 Width() const {
-        return width_minus_1 + 1;
+        if (header_version != TICHeaderVersion::OneDBuffer) {
+            return width_minus_1 + 1;
+        }
+        return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
    }

    u32 Height() const {
@@ -240,6 +247,15 @@ struct TICEntry {
               header_version == TICHeaderVersion::BlockLinearColorKey;
    }

+    bool IsLineal() const {
+        return header_version == TICHeaderVersion::Pitch ||
+               header_version == TICHeaderVersion::PitchColorKey;
+    }
+
+    bool IsBuffer() const {
+        return header_version == TICHeaderVersion::OneDBuffer;
+    }
+
    bool IsSrgbConversionEnabled() const {
        return srgb_conversion != 0;
    }
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -82,6 +82,8 @@ add_executable(yuzu
    util/limitable_input_dialog.h
    util/sequence_dialog/sequence_dialog.cpp
    util/sequence_dialog/sequence_dialog.h
+    util/spinbox.cpp
+    util/spinbox.h
    util/util.cpp
    util/util.h
    compatdb.cpp
--- a/src/yuzu/about_dialog.cpp
+++ b/src/yuzu/about_dialog.cpp
@@ -9,10 +9,10 @@

 AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
    ui->setupUi(this);
-    ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
-    ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
-        QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch),
-        QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
+    ui->labelLogo->setPixmap(QIcon::fromTheme("yuzu").pixmap(200));
+    ui->labelBuildInfo->setText(
+        ui->labelBuildInfo->text().arg(Common::g_build_fullname, Common::g_scm_branch,
+                                       Common::g_scm_desc, QString(Common::g_build_date).left(10)));
 }

 AboutDialog::~AboutDialog() = default;
--- a/src/yuzu/applets/error.cpp
+++ b/src/yuzu/applets/error.cpp
@@ -54,6 +54,6 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te

 void QtErrorDisplay::MainWindowFinishedError() {
    // Acquire the HLE mutex
-    std::lock_guard lock{HLE::g_hle_lock};
+    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
    callback();
 }
--- a/src/yuzu/applets/profile_select.cpp
+++ b/src/yuzu/applets/profile_select.cpp
@@ -84,10 +84,10 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(QWidget* parent)
    tree_view->setContextMenuPolicy(Qt::NoContextMenu);

    item_model->insertColumns(0, 1);
-    item_model->setHeaderData(0, Qt::Horizontal, tr("Users"));
+    item_model->setHeaderData(0, Qt::Horizontal, "Users");

    // We must register all custom types with the Qt Automoc system so that we are able to use it
-    // with signals/slots. In this case, QList falls under the umbrella of custom types.
+    // with signals/slots. In this case, QList falls under the umbrells of custom types.
    qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");

    layout->setContentsMargins(0, 0, 0, 0);
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -188,9 +188,7 @@ private:
 GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
    : QWidget(parent), emu_thread(emu_thread) {
    setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
-                       .arg(QString::fromUtf8(Common::g_build_name),
-                            QString::fromUtf8(Common::g_scm_branch),
-                            QString::fromUtf8(Common::g_scm_desc)));
+                       .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
    setAttribute(Qt::WA_AcceptTouchEvents);

    InputCommon::Init();
@@ -219,7 +217,7 @@ void GRenderWindow::SwapBuffers() {
    // However:
    // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
    // since the last time `swapBuffers` was executed;
-    // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
+    // - On macOS, if `makeCurrent` isn't called explicitely, resizing the buffer breaks.
    context->makeCurrent(child);

    context->swapBuffers(child);
@@ -381,7 +379,6 @@ void GRenderWindow::InitRenderTarget() {
    fmt.setVersion(4, 3);
    if (Settings::values.use_compatibility_profile) {
        fmt.setProfile(QSurfaceFormat::CompatibilityProfile);
-        fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
    } else {
        fmt.setProfile(QSurfaceFormat::CoreProfile);
    }
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <QVariant>
 #include "core/settings.h"
+#include "yuzu/ui_settings.h"

 class QSettings;

@@ -36,51 +37,19 @@ private:
    void ReadTouchscreenValues();
    void ApplyDefaultProfileIfInputInvalid();

-    // Read functions bases off the respective config section names.
-    void ReadAudioValues();
-    void ReadControlValues();
-    void ReadCoreValues();
-    void ReadDataStorageValues();
-    void ReadDebuggingValues();
-    void ReadDisabledAddOnValues();
-    void ReadMiscellaneousValues();
-    void ReadPathValues();
-    void ReadRendererValues();
-    void ReadShortcutValues();
-    void ReadSystemValues();
-    void ReadUIValues();
-    void ReadUIGamelistValues();
-    void ReadUILayoutValues();
-    void ReadWebServiceValues();
-
    void SaveValues();
    void SavePlayerValues();
    void SaveDebugValues();
    void SaveMouseValues();
    void SaveTouchscreenValues();

-    // Save functions based off the respective config section names.
-    void SaveAudioValues();
-    void SaveControlValues();
-    void SaveCoreValues();
-    void SaveDataStorageValues();
-    void SaveDebuggingValues();
-    void SaveDisabledAddOnValues();
-    void SaveMiscellaneousValues();
-    void SavePathValues();
-    void SaveRendererValues();
-    void SaveShortcutValues();
-    void SaveSystemValues();
-    void SaveUIValues();
-    void SaveUIGamelistValues();
-    void SaveUILayoutValues();
-    void SaveWebServiceValues();
-
    QVariant ReadSetting(const QString& name) const;
    QVariant ReadSetting(const QString& name, const QVariant& default_value) const;
    void WriteSetting(const QString& name, const QVariant& value);
    void WriteSetting(const QString& name, const QVariant& value, const QVariant& default_value);

+    static const std::array<UISettings::Shortcut, 15> default_hotkeys;
+
    std::unique_ptr<QSettings> qt_config;
    std::string qt_config_loc;
 };
--- a/src/yuzu/configuration/configure_audio.cpp
+++ b/src/yuzu/configuration/configure_audio.cpp
@@ -16,21 +16,21 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
    ui->setupUi(this);

    ui->output_sink_combo_box->clear();
-    ui->output_sink_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
+    ui->output_sink_combo_box->addItem("auto");
    for (const char* id : AudioCore::GetSinkIDs()) {
-        ui->output_sink_combo_box->addItem(QString::fromUtf8(id));
+        ui->output_sink_combo_box->addItem(id);
    }

    connect(ui->volume_slider, &QSlider::valueChanged, this,
            &ConfigureAudio::setVolumeIndicatorText);

    this->setConfiguration();
-    connect(ui->output_sink_combo_box, qOverload<int>(&QComboBox::currentIndexChanged), this,
+    connect(ui->output_sink_combo_box,
+            static_cast<void (QComboBox::*)(int)>(&QComboBox::currentIndexChanged), this,
            &ConfigureAudio::updateAudioDevices);

-    const bool is_powered_on = Core::System::GetInstance().IsPoweredOn();
-    ui->output_sink_combo_box->setEnabled(!is_powered_on);
-    ui->audio_device_combo_box->setEnabled(!is_powered_on);
+    ui->output_sink_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->audio_device_combo_box->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }

 ConfigureAudio::~ConfigureAudio() = default;
@@ -94,7 +94,7 @@ void ConfigureAudio::applyConfiguration() {

 void ConfigureAudio::updateAudioDevices(int sink_index) {
    ui->audio_device_combo_box->clear();
-    ui->audio_device_combo_box->addItem(QString::fromUtf8(AudioCore::auto_device_name));
+    ui->audio_device_combo_box->addItem(AudioCore::auto_device_name);

    const std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString();
    for (const auto& device : AudioCore::GetDeviceListForSink(sink_id)) {
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -100,15 +100,13 @@ void ConfigureGameList::RetranslateUI() {

 void ConfigureGameList::InitializeIconSizeComboBox() {
    for (const auto& size : default_icon_sizes) {
-        ui->icon_size_combobox->addItem(QString::fromUtf8(size.second), size.first);
+        ui->icon_size_combobox->addItem(size.second, size.first);
    }
 }

 void ConfigureGameList::InitializeRowComboBoxes() {
    for (std::size_t i = 0; i < row_text_names.size(); ++i) {
-        const QString row_text_name = QString::fromUtf8(row_text_names[i]);
-
-        ui->row_1_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
-        ui->row_2_text_combobox->addItem(row_text_name, QVariant::fromValue(i));
+        ui->row_1_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
+        ui->row_2_text_combobox->addItem(row_text_names[i], QVariant::fromValue(i));
    }
 }
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -14,8 +14,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
    ui->setupUi(this);

    for (const auto& theme : UISettings::themes) {
-        ui->theme_combobox->addItem(QString::fromUtf8(theme.first),
-                                    QString::fromUtf8(theme.second));
+        ui->theme_combobox->addItem(theme.first, theme.second);
    }

    this->setConfiguration();
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ReinUsesLisp	9a8c1745f1	gl_shader_decompiler: Implement image binding settings	2019-05-16 20:03:51 -03:00
ReinUsesLisp	f96d50165f	shader: Implement bindless images	2019-05-16 20:03:51 -03:00
ReinUsesLisp	f9f541470e	shader: Decode SUST and implement backing image functionality	2019-05-16 20:03:51 -03:00
ReinUsesLisp	ce691745dc	gl_rasterizer: Track texture buffer usage	2019-05-16 20:03:51 -03:00
ReinUsesLisp	1d59af8f7c	video_core: Make ARB_buffer_storage a required extension	2019-05-16 20:03:50 -03:00
ReinUsesLisp	a6252257eb	gl_rasterizer_cache: Use texture buffers to emulate texture buffers	2019-05-16 20:03:50 -03:00
ReinUsesLisp	dc5e5ac3b0	maxwell_3d: Partially implement texture buffers as 1D textures	2019-05-16 18:55:20 -03:00
ReinUsesLisp	4f612052b2	gl_shader_decompiler: Allow 1D textures to be texture buffers	2019-05-16 18:55:20 -03:00
ReinUsesLisp	89eef17670	shader: Implement texture buffers	2019-05-16 18:55:20 -03:00