bcat/backend: Make formatting of passphrase consistent in NullBackend::SetPassphrase()

Aligns the '=' to be consistent with the rest of the logs within this source file.
bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear()
2020-02-12 01:18:29 -05:00 · 2020-02-12 01:14:47 -05:00 · 2020-02-11 08:26:07 -05:00 · 2020-02-09 22:29:28 -05:00 · 2020-02-09 12:08:01 -05:00 · 2020-02-09 11:30:02 -05:00
141 changed files with 3426 additions and 1248 deletions
--- a/.ci/scripts/linux/docker.sh
+++ b/.ci/scripts/linux/docker.sh
@@ -5,7 +5,7 @@ cd /yuzu
 ccache -s

 mkdir build || true && cd build
-cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
+cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_VULKAN=No

 ninja

--- a/.ci/scripts/windows/docker.sh
+++ b/.ci/scripts/windows/docker.sh
@@ -13,7 +13,7 @@ echo '' >> /bin/cmd
 chmod +x /bin/cmd

 mkdir build || true && cd build
-cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
+cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_VULKAN=No
 ninja

 # Clean up the dirty hacks
--- a/dist/qt_themes/colorful/style.qrc
+++ b/dist/qt_themes/colorful/style.qrc
@@ -10,6 +10,6 @@
        <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
    </qresource>
    <qresource prefix="colorful">
-        <file>style.qss</file>
+        <file alias="style.qss">../default/style.qss</file>
    </qresource>
 </RCC>
--- a/dist/qt_themes/colorful/style.qss
+++ b/dist/qt_themes/colorful/style.qss
@@ -1,4 +0,0 @@
-/*
-    This file is intentionally left blank.
-    We do not want to apply any stylesheet for colorful, only icons.
-*/
--- a/dist/qt_themes/default/default.qrc
+++ b/dist/qt_themes/default/default.qrc
@@ -1,25 +1,18 @@
 <RCC>
    <qresource prefix="icons/default">
        <file alias="index.theme">icons/index.theme</file>
-      
        <file alias="16x16/checked.png">icons/16x16/checked.png</file>
-
        <file alias="16x16/failed.png">icons/16x16/failed.png</file>
-
        <file alias="16x16/lock.png">icons/16x16/lock.png</file>
-
        <file alias="48x48/bad_folder.png">icons/48x48/bad_folder.png</file>
-      
        <file alias="48x48/chip.png">icons/48x48/chip.png</file>
-
        <file alias="48x48/folder.png">icons/48x48/folder.png</file>
-
        <file alias="48x48/plus.png">icons/48x48/plus.png</file>
-      
        <file alias="48x48/sd_card.png">icons/48x48/sd_card.png</file>
-      
        <file alias="256x256/yuzu.png">icons/256x256/yuzu.png</file>
-
        <file alias="256x256/plus_folder.png">icons/256x256/plus_folder.png</file>
    </qresource>
+    <qresource prefix="default">
+        <file>style.qss</file>
+    </qresource>
 </RCC>
--- a/dist/qt_themes/default/style.qss
+++ b/dist/qt_themes/default/style.qss
@@ -0,0 +1,35 @@
+QPushButton#TogglableStatusBarButton {
+    color: #959595;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#TogglableStatusBarButton:checked {
+    color: #000000;
+}
+
+QPushButton#TogglableStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton {
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#RendererStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton:checked {
+    color: #e85c00;
+}
+
+QPushButton#RendererStatusBarButton:!checked{
+    color: #0066ff;
+}
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -1236,3 +1236,41 @@ QToolButton:disabled,
 QPlainTextEdit:disabled {
    background-color: #2b2e31;
 }
+
+QPushButton#TogglableStatusBarButton {
+    min-width: 0px;
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#TogglableStatusBarButton:checked {
+    color: #ffffff;
+}
+
+QPushButton#TogglableStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton {
+    min-width: 0px;
+    color: #656565;
+    border: 1px solid transparent;
+    background-color: transparent;
+    padding: 0px 3px 0px 3px;
+    text-align: center;
+}
+
+QPushButton#RendererStatusBarButton:hover {
+    border: 1px solid #76797C;
+}
+
+QPushButton#RendererStatusBarButton:checked {
+    color: #e85c00;
+}
+
+QPushButton#RendererStatusBarButton:!checked{
+   color: #00ccdd;
+}
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -15,14 +15,14 @@ add_library(core STATIC
    constants.h
    core.cpp
    core.h
-    core_cpu.cpp
-    core_cpu.h
+    core_manager.cpp
+    core_manager.h
    core_timing.cpp
    core_timing.h
    core_timing_util.cpp
    core_timing_util.h
-    cpu_core_manager.cpp
-    cpu_core_manager.h
+    cpu_manager.cpp
+    cpu_manager.h
    crypto/aes_util.cpp
    crypto/aes_util.h
    crypto/encryption_layer.cpp
@@ -158,6 +158,8 @@ add_library(core STATIC
    hle/kernel/mutex.h
    hle/kernel/object.cpp
    hle/kernel/object.h
+    hle/kernel/physical_core.cpp
+    hle/kernel/physical_core.h
    hle/kernel/process.cpp
    hle/kernel/process.h
    hle/kernel/process_capability.cpp
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -10,11 +10,12 @@
 #include "common/microprofile.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
+#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
@@ -87,7 +88,7 @@ public:
            if (GDBStub::IsServerEnabled()) {
                parent.jit->HaltExecution();
                parent.SetPC(pc);
-                Kernel::Thread* thread = Kernel::GetCurrentThread();
+                Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread();
                parent.SaveContext(thread->GetContext());
                GDBStub::Break();
                GDBStub::SendTrap(thread, 5);
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -2,10 +2,24 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
 #include "core/arm/exclusive_monitor.h"
+#include "core/memory.h"

 namespace Core {

 ExclusiveMonitor::~ExclusiveMonitor() = default;

+std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory,
+                                                             std::size_t num_cores) {
+#ifdef ARCHITECTURE_x86_64
+    return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores);
+#else
+    // TODO(merry): Passthrough exclusive monitor
+    return nullptr;
+#endif
+}
+
 } // namespace Core
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -4,8 +4,14 @@

 #pragma once

+#include <memory>
+
 #include "common/common_types.h"

+namespace Memory {
+class Memory;
+}
+
 namespace Core {

 class ExclusiveMonitor {
@@ -22,4 +28,7 @@ public:
    virtual bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) = 0;
 };

+std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory,
+                                                             std::size_t num_cores);
+
 } // namespace Core
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -9,6 +9,7 @@
 #include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/svc.h"

 namespace Core {
@@ -177,7 +178,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
        }

-        Kernel::Thread* thread = Kernel::GetCurrentThread();
+        Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread();
        SaveContext(thread->GetContext());
        if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
            last_bkpt_hit = false;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -11,9 +11,9 @@
 #include "common/string_util.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
+#include "core/core_manager.h"
 #include "core/core_timing.h"
-#include "core/cpu_core_manager.h"
+#include "core/cpu_manager.h"
 #include "core/file_sys/bis_factory.h"
 #include "core/file_sys/card_image.h"
 #include "core/file_sys/mode.h"
@@ -28,6 +28,7 @@
 #include "core/hardware_interrupt_manager.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
@@ -113,16 +114,25 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
 struct System::Impl {
    explicit Impl(System& system)
        : kernel{system}, fs_controller{system}, memory{system},
-          cpu_core_manager{system}, reporter{system}, applet_manager{system} {}
+          cpu_manager{system}, reporter{system}, applet_manager{system} {}

-    Cpu& CurrentCpuCore() {
-        return cpu_core_manager.GetCurrentCore();
+    CoreManager& CurrentCoreManager() {
+        return cpu_manager.GetCurrentCoreManager();
+    }
+
+    Kernel::PhysicalCore& CurrentPhysicalCore() {
+        const auto index = cpu_manager.GetActiveCoreIndex();
+        return kernel.PhysicalCore(index);
+    }
+
+    Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) {
+        return kernel.PhysicalCore(index);
    }

    ResultStatus RunLoop(bool tight_loop) {
        status = ResultStatus::Success;

-        cpu_core_manager.RunLoop(tight_loop);
+        cpu_manager.RunLoop(tight_loop);

        return status;
    }
@@ -131,8 +141,8 @@ struct System::Impl {
        LOG_DEBUG(HW_Memory, "initialized OK");

        core_timing.Initialize();
-        cpu_core_manager.Initialize();
        kernel.Initialize();
+        cpu_manager.Initialize();

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
            std::chrono::system_clock::now().time_since_epoch());
@@ -205,7 +215,6 @@ struct System::Impl {
        // Main process has been loaded and been made current.
        // Begin GPU and CPU execution.
        gpu_core->Start();
-        cpu_core_manager.StartThreads();

        // Initialize cheat engine
        if (cheat_engine) {
@@ -259,7 +268,9 @@ struct System::Impl {
        is_powered_on = false;
        exit_lock = false;

-        gpu_core->WaitIdle();
+        if (gpu_core) {
+            gpu_core->WaitIdle();
+        }

        // Shutdown emulation session
        renderer.reset();
@@ -272,7 +283,7 @@ struct System::Impl {
        gpu_core.reset();

        // Close all CPU/threading state
-        cpu_core_manager.Shutdown();
+        cpu_manager.Shutdown();

        // Shutdown kernel and core timing
        kernel.Shutdown();
@@ -342,7 +353,7 @@ struct System::Impl {
    std::unique_ptr<Tegra::GPU> gpu_core;
    std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
    Memory::Memory memory;
-    CpuCoreManager cpu_core_manager;
+    CpuManager cpu_manager;
    bool is_powered_on = false;
    bool exit_lock = false;

@@ -377,12 +388,12 @@ struct System::Impl {
 System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;

-Cpu& System::CurrentCpuCore() {
-    return impl->CurrentCpuCore();
+CoreManager& System::CurrentCoreManager() {
+    return impl->CurrentCoreManager();
 }

-const Cpu& System::CurrentCpuCore() const {
-    return impl->CurrentCpuCore();
+const CoreManager& System::CurrentCoreManager() const {
+    return impl->CurrentCoreManager();
 }

 System::ResultStatus System::RunLoop(bool tight_loop) {
@@ -394,7 +405,7 @@ System::ResultStatus System::SingleStep() {
 }

 void System::InvalidateCpuInstructionCaches() {
-    impl->cpu_core_manager.InvalidateAllInstructionCaches();
+    impl->kernel.InvalidateAllInstructionCaches();
 }

 System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) {
@@ -406,13 +417,11 @@ bool System::IsPoweredOn() const {
 }

 void System::PrepareReschedule() {
-    CurrentCpuCore().PrepareReschedule();
+    impl->CurrentPhysicalCore().Stop();
 }

 void System::PrepareReschedule(const u32 core_index) {
-    if (core_index < GlobalScheduler().CpuCoresCount()) {
-        CpuCore(core_index).PrepareReschedule();
-    }
+    impl->kernel.PrepareReschedule(core_index);
 }

 PerfStatsResults System::GetAndResetPerfStats() {
@@ -428,31 +437,31 @@ const TelemetrySession& System::TelemetrySession() const {
 }

 ARM_Interface& System::CurrentArmInterface() {
-    return CurrentCpuCore().ArmInterface();
+    return impl->CurrentPhysicalCore().ArmInterface();
 }

 const ARM_Interface& System::CurrentArmInterface() const {
-    return CurrentCpuCore().ArmInterface();
+    return impl->CurrentPhysicalCore().ArmInterface();
 }

 std::size_t System::CurrentCoreIndex() const {
-    return CurrentCpuCore().CoreIndex();
+    return impl->cpu_manager.GetActiveCoreIndex();
 }

 Kernel::Scheduler& System::CurrentScheduler() {
-    return CurrentCpuCore().Scheduler();
+    return impl->CurrentPhysicalCore().Scheduler();
 }

 const Kernel::Scheduler& System::CurrentScheduler() const {
-    return CurrentCpuCore().Scheduler();
+    return impl->CurrentPhysicalCore().Scheduler();
 }

 Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
-    return CpuCore(core_index).Scheduler();
+    return impl->GetPhysicalCore(core_index).Scheduler();
 }

 const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
-    return CpuCore(core_index).Scheduler();
+    return impl->GetPhysicalCore(core_index).Scheduler();
 }

 /// Gets the global scheduler
@@ -474,28 +483,28 @@ const Kernel::Process* System::CurrentProcess() const {
 }

 ARM_Interface& System::ArmInterface(std::size_t core_index) {
-    return CpuCore(core_index).ArmInterface();
+    return impl->GetPhysicalCore(core_index).ArmInterface();
 }

 const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
-    return CpuCore(core_index).ArmInterface();
+    return impl->GetPhysicalCore(core_index).ArmInterface();
 }

-Cpu& System::CpuCore(std::size_t core_index) {
-    return impl->cpu_core_manager.GetCore(core_index);
+CoreManager& System::GetCoreManager(std::size_t core_index) {
+    return impl->cpu_manager.GetCoreManager(core_index);
 }

-const Cpu& System::CpuCore(std::size_t core_index) const {
+const CoreManager& System::GetCoreManager(std::size_t core_index) const {
    ASSERT(core_index < NUM_CPU_CORES);
-    return impl->cpu_core_manager.GetCore(core_index);
+    return impl->cpu_manager.GetCoreManager(core_index);
 }

 ExclusiveMonitor& System::Monitor() {
-    return impl->cpu_core_manager.GetExclusiveMonitor();
+    return impl->kernel.GetExclusiveMonitor();
 }

 const ExclusiveMonitor& System::Monitor() const {
-    return impl->cpu_core_manager.GetExclusiveMonitor();
+    return impl->kernel.GetExclusiveMonitor();
 }

 Memory::Memory& System::Memory() {
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -93,7 +93,7 @@ class Memory;
 namespace Core {

 class ARM_Interface;
-class Cpu;
+class CoreManager;
 class ExclusiveMonitor;
 class FrameLimiter;
 class PerfStats;
@@ -218,10 +218,10 @@ public:
    const ARM_Interface& ArmInterface(std::size_t core_index) const;

    /// Gets a CPU interface to the CPU core with the specified index
-    Cpu& CpuCore(std::size_t core_index);
+    CoreManager& GetCoreManager(std::size_t core_index);

    /// Gets a CPU interface to the CPU core with the specified index
-    const Cpu& CpuCore(std::size_t core_index) const;
+    const CoreManager& GetCoreManager(std::size_t core_index) const;

    /// Gets a reference to the exclusive monitor
    ExclusiveMonitor& Monitor();
@@ -364,10 +364,10 @@ private:
    System();

    /// Returns the currently running CPU core
-    Cpu& CurrentCpuCore();
+    CoreManager& CurrentCoreManager();

    /// Returns the currently running CPU core
-    const Cpu& CurrentCpuCore() const;
+    const CoreManager& CurrentCoreManager() const;

    /**
     * Initialize the emulated system.
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -1,127 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-
-#include "common/logging/log.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#endif
-#include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"
-#include "core/core.h"
-#include "core/core_cpu.h"
-#include "core/core_timing.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/lock.h"
-#include "core/settings.h"
-
-namespace Core {
-
-void CpuBarrier::NotifyEnd() {
-    std::unique_lock lock{mutex};
-    end = true;
-    condition.notify_all();
-}
-
-bool CpuBarrier::Rendezvous() {
-    if (!Settings::values.use_multi_core) {
-        // Meaningless when running in single-core mode
-        return true;
-    }
-
-    if (!end) {
-        std::unique_lock lock{mutex};
-
-        --cores_waiting;
-        if (!cores_waiting) {
-            cores_waiting = NUM_CPU_CORES;
-            condition.notify_all();
-            return true;
-        }
-
-        condition.wait(lock);
-        return true;
-    }
-
-    return false;
-}
-
-Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
-         std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()},
-      core_timing{system.CoreTiming()}, core_index{core_index} {
-#ifdef ARCHITECTURE_x86_64
-    arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
-#else
-    arm_interface = std::make_unique<ARM_Unicorn>(system);
-    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-
-    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index);
-}
-
-Cpu::~Cpu() = default;
-
-std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(
-    [[maybe_unused]] Memory::Memory& memory, [[maybe_unused]] std::size_t num_cores) {
-#ifdef ARCHITECTURE_x86_64
-    return std::make_unique<DynarmicExclusiveMonitor>(memory, num_cores);
-#else
-    // TODO(merry): Passthrough exclusive monitor
-    return nullptr;
-#endif
-}
-
-void Cpu::RunLoop(bool tight_loop) {
-    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
-    if (!cpu_barrier.Rendezvous()) {
-        // If rendezvous failed, session has been killed
-        return;
-    }
-
-    Reschedule();
-
-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        LOG_TRACE(Core, "Core-{} idling", core_index);
-        core_timing.Idle();
-    } else {
-        if (tight_loop) {
-            arm_interface->Run();
-        } else {
-            arm_interface->Step();
-        }
-        // We are stopping a run, exclusive state must be cleared
-        arm_interface->ClearExclusiveState();
-    }
-    core_timing.Advance();
-
-    Reschedule();
-}
-
-void Cpu::SingleStep() {
-    return RunLoop(false);
-}
-
-void Cpu::PrepareReschedule() {
-    arm_interface->PrepareReschedule();
-}
-
-void Cpu::Reschedule() {
-    // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard lock(HLE::g_hle_lock);
-
-    global_scheduler.SelectThread(core_index);
-    scheduler->TryDoContextSwitch();
-}
-
-void Cpu::Shutdown() {
-    scheduler->Shutdown();
-}
-
-} // namespace Core
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -1,120 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <condition_variable>
-#include <cstddef>
-#include <memory>
-#include <mutex>
-#include "common/common_types.h"
-
-namespace Kernel {
-class GlobalScheduler;
-class Scheduler;
-} // namespace Kernel
-
-namespace Core {
-class System;
-}
-
-namespace Core::Timing {
-class CoreTiming;
-}
-
-namespace Memory {
-class Memory;
-}
-
-namespace Core {
-
-class ARM_Interface;
-class ExclusiveMonitor;
-
-constexpr unsigned NUM_CPU_CORES{4};
-
-class CpuBarrier {
-public:
-    bool IsAlive() const {
-        return !end;
-    }
-
-    void NotifyEnd();
-
-    bool Rendezvous();
-
-private:
-    unsigned cores_waiting{NUM_CPU_CORES};
-    std::mutex mutex;
-    std::condition_variable condition;
-    std::atomic<bool> end{};
-};
-
-class Cpu {
-public:
-    Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
-        std::size_t core_index);
-    ~Cpu();
-
-    void RunLoop(bool tight_loop = true);
-
-    void SingleStep();
-
-    void PrepareReschedule();
-
-    ARM_Interface& ArmInterface() {
-        return *arm_interface;
-    }
-
-    const ARM_Interface& ArmInterface() const {
-        return *arm_interface;
-    }
-
-    Kernel::Scheduler& Scheduler() {
-        return *scheduler;
-    }
-
-    const Kernel::Scheduler& Scheduler() const {
-        return *scheduler;
-    }
-
-    bool IsMainCore() const {
-        return core_index == 0;
-    }
-
-    std::size_t CoreIndex() const {
-        return core_index;
-    }
-
-    void Shutdown();
-
-    /**
-     * Creates an exclusive monitor to handle exclusive reads/writes.
-     *
-     * @param memory The current memory subsystem that the monitor may wish
-     *               to keep track of.
-     *
-     * @param num_cores The number of cores to assume about the CPU.
-     *
-     * @returns The constructed exclusive monitor instance, or nullptr if the current
-     *          CPU backend is unable to use an exclusive monitor.
-     */
-    static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(Memory::Memory& memory,
-                                                                  std::size_t num_cores);
-
-private:
-    void Reschedule();
-
-    std::unique_ptr<ARM_Interface> arm_interface;
-    CpuBarrier& cpu_barrier;
-    Kernel::GlobalScheduler& global_scheduler;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
-    Timing::CoreTiming& core_timing;
-
-    std::atomic<bool> reschedule_pending = false;
-    std::size_t core_index;
-};
-
-} // namespace Core
--- a/src/core/core_manager.cpp
+++ b/src/core/core_manager.cpp
@@ -0,0 +1,70 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/exclusive_monitor.h"
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core.h"
+#include "core/core_manager.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
+#include "core/settings.h"
+
+namespace Core {
+
+CoreManager::CoreManager(System& system, std::size_t core_index)
+    : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore(
+                                                      core_index)},
+      core_timing{system.CoreTiming()}, core_index{core_index} {}
+
+CoreManager::~CoreManager() = default;
+
+void CoreManager::RunLoop(bool tight_loop) {
+    Reschedule();
+
+    // If we don't have a currently active thread then don't execute instructions,
+    // instead advance to the next event and try to yield to the next thread
+    if (Kernel::GetCurrentThread() == nullptr) {
+        LOG_TRACE(Core, "Core-{} idling", core_index);
+        core_timing.Idle();
+    } else {
+        if (tight_loop) {
+            physical_core.Run();
+        } else {
+            physical_core.Step();
+        }
+    }
+    core_timing.Advance();
+
+    Reschedule();
+}
+
+void CoreManager::SingleStep() {
+    return RunLoop(false);
+}
+
+void CoreManager::PrepareReschedule() {
+    physical_core.Stop();
+}
+
+void CoreManager::Reschedule() {
+    // Lock the global kernel mutex when we manipulate the HLE state
+    std::lock_guard lock(HLE::g_hle_lock);
+
+    global_scheduler.SelectThread(core_index);
+
+    physical_core.Scheduler().TryDoContextSwitch();
+}
+
+} // namespace Core
--- a/src/core/core_manager.h
+++ b/src/core/core_manager.h
@@ -0,0 +1,63 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <memory>
+#include "common/common_types.h"
+
+namespace Kernel {
+class GlobalScheduler;
+class PhysicalCore;
+} // namespace Kernel
+
+namespace Core {
+class System;
+}
+
+namespace Core::Timing {
+class CoreTiming;
+}
+
+namespace Memory {
+class Memory;
+}
+
+namespace Core {
+
+constexpr unsigned NUM_CPU_CORES{4};
+
+class CoreManager {
+public:
+    CoreManager(System& system, std::size_t core_index);
+    ~CoreManager();
+
+    void RunLoop(bool tight_loop = true);
+
+    void SingleStep();
+
+    void PrepareReschedule();
+
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
+    std::size_t CoreIndex() const {
+        return core_index;
+    }
+
+private:
+    void Reschedule();
+
+    Kernel::GlobalScheduler& global_scheduler;
+    Kernel::PhysicalCore& physical_core;
+    Timing::CoreTiming& core_timing;
+
+    std::atomic<bool> reschedule_pending = false;
+    std::size_t core_index;
+};
+
+} // namespace Core
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -1,152 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/assert.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/core.h"
-#include "core/core_cpu.h"
-#include "core/core_timing.h"
-#include "core/cpu_core_manager.h"
-#include "core/gdbstub/gdbstub.h"
-#include "core/settings.h"
-
-namespace Core {
-namespace {
-void RunCpuCore(const System& system, Cpu& cpu_state) {
-    while (system.IsPoweredOn()) {
-        cpu_state.RunLoop(true);
-    }
-}
-} // Anonymous namespace
-
-CpuCoreManager::CpuCoreManager(System& system) : system{system} {}
-CpuCoreManager::~CpuCoreManager() = default;
-
-void CpuCoreManager::Initialize() {
-    barrier = std::make_unique<CpuBarrier>();
-    exclusive_monitor = Cpu::MakeExclusiveMonitor(system.Memory(), cores.size());
-
-    for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
-    }
-}
-
-void CpuCoreManager::StartThreads() {
-    // Create threads for CPU cores 1-3, and build thread_to_cpu map
-    // CPU core 0 is run on the main thread
-    thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
-    if (!Settings::values.use_multi_core) {
-        return;
-    }
-
-    for (std::size_t index = 0; index < core_threads.size(); ++index) {
-        core_threads[index] = std::make_unique<std::thread>(RunCpuCore, std::cref(system),
-                                                            std::ref(*cores[index + 1]));
-        thread_to_cpu[core_threads[index]->get_id()] = cores[index + 1].get();
-    }
-}
-
-void CpuCoreManager::Shutdown() {
-    barrier->NotifyEnd();
-    if (Settings::values.use_multi_core) {
-        for (auto& thread : core_threads) {
-            thread->join();
-            thread.reset();
-        }
-    }
-
-    thread_to_cpu.clear();
-    for (auto& cpu_core : cores) {
-        cpu_core->Shutdown();
-        cpu_core.reset();
-    }
-
-    exclusive_monitor.reset();
-    barrier.reset();
-}
-
-Cpu& CpuCoreManager::GetCore(std::size_t index) {
-    return *cores.at(index);
-}
-
-const Cpu& CpuCoreManager::GetCore(std::size_t index) const {
-    return *cores.at(index);
-}
-
-ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() {
-    return *exclusive_monitor;
-}
-
-const ExclusiveMonitor& CpuCoreManager::GetExclusiveMonitor() const {
-    return *exclusive_monitor;
-}
-
-Cpu& CpuCoreManager::GetCurrentCore() {
-    if (Settings::values.use_multi_core) {
-        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
-        ASSERT(search != thread_to_cpu.end());
-        ASSERT(search->second);
-        return *search->second;
-    }
-
-    // Otherwise, use single-threaded mode active_core variable
-    return *cores[active_core];
-}
-
-const Cpu& CpuCoreManager::GetCurrentCore() const {
-    if (Settings::values.use_multi_core) {
-        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
-        ASSERT(search != thread_to_cpu.end());
-        ASSERT(search->second);
-        return *search->second;
-    }
-
-    // Otherwise, use single-threaded mode active_core variable
-    return *cores[active_core];
-}
-
-void CpuCoreManager::RunLoop(bool tight_loop) {
-    // Update thread_to_cpu in case Core 0 is run from a different host thread
-    thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
-
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::HandlePacket();
-
-        // If the loop is halted and we want to step, use a tiny (1) number of instructions to
-        // execute. Otherwise, get out of the loop function.
-        if (GDBStub::GetCpuHaltFlag()) {
-            if (GDBStub::GetCpuStepFlag()) {
-                tight_loop = false;
-            } else {
-                return;
-            }
-        }
-    }
-
-    auto& core_timing = system.CoreTiming();
-    core_timing.ResetRun();
-    bool keep_running{};
-    do {
-        keep_running = false;
-        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
-            core_timing.SwitchContext(active_core);
-            if (core_timing.CanCurrentContextRun()) {
-                cores[active_core]->RunLoop(tight_loop);
-            }
-            keep_running |= core_timing.CanCurrentContextRun();
-        }
-    } while (keep_running);
-
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::SetCpuStepFlag(false);
-    }
-}
-
-void CpuCoreManager::InvalidateAllInstructionCaches() {
-    for (auto& cpu : cores) {
-        cpu->ArmInterface().ClearInstructionCache();
-    }
-}
-
-} // namespace Core
--- a/src/core/cpu_core_manager.h
+++ b/src/core/cpu_core_manager.h
@@ -1,62 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <map>
-#include <memory>
-#include <thread>
-
-namespace Core {
-
-class Cpu;
-class CpuBarrier;
-class ExclusiveMonitor;
-class System;
-
-class CpuCoreManager {
-public:
-    explicit CpuCoreManager(System& system);
-    CpuCoreManager(const CpuCoreManager&) = delete;
-    CpuCoreManager(CpuCoreManager&&) = delete;
-
-    ~CpuCoreManager();
-
-    CpuCoreManager& operator=(const CpuCoreManager&) = delete;
-    CpuCoreManager& operator=(CpuCoreManager&&) = delete;
-
-    void Initialize();
-    void StartThreads();
-    void Shutdown();
-
-    Cpu& GetCore(std::size_t index);
-    const Cpu& GetCore(std::size_t index) const;
-
-    Cpu& GetCurrentCore();
-    const Cpu& GetCurrentCore() const;
-
-    ExclusiveMonitor& GetExclusiveMonitor();
-    const ExclusiveMonitor& GetExclusiveMonitor() const;
-
-    void RunLoop(bool tight_loop);
-
-    void InvalidateAllInstructionCaches();
-
-private:
-    static constexpr std::size_t NUM_CPU_CORES = 4;
-
-    std::unique_ptr<ExclusiveMonitor> exclusive_monitor;
-    std::unique_ptr<CpuBarrier> barrier;
-    std::array<std::unique_ptr<Cpu>, NUM_CPU_CORES> cores;
-    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> core_threads;
-    std::size_t active_core{}; ///< Active core, only used in single thread mode
-
-    /// Map of guest threads to CPU cores
-    std::map<std::thread::id, Cpu*> thread_to_cpu;
-
-    System& system;
-};
-
-} // namespace Core
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -0,0 +1,81 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/arm/exclusive_monitor.h"
+#include "core/core.h"
+#include "core/core_manager.h"
+#include "core/core_timing.h"
+#include "core/cpu_manager.h"
+#include "core/gdbstub/gdbstub.h"
+
+namespace Core {
+
+CpuManager::CpuManager(System& system) : system{system} {}
+CpuManager::~CpuManager() = default;
+
+void CpuManager::Initialize() {
+    for (std::size_t index = 0; index < core_managers.size(); ++index) {
+        core_managers[index] = std::make_unique<CoreManager>(system, index);
+    }
+}
+
+void CpuManager::Shutdown() {
+    for (auto& cpu_core : core_managers) {
+        cpu_core.reset();
+    }
+}
+
+CoreManager& CpuManager::GetCoreManager(std::size_t index) {
+    return *core_managers.at(index);
+}
+
+const CoreManager& CpuManager::GetCoreManager(std::size_t index) const {
+    return *core_managers.at(index);
+}
+
+CoreManager& CpuManager::GetCurrentCoreManager() {
+    // Otherwise, use single-threaded mode active_core variable
+    return *core_managers[active_core];
+}
+
+const CoreManager& CpuManager::GetCurrentCoreManager() const {
+    // Otherwise, use single-threaded mode active_core variable
+    return *core_managers[active_core];
+}
+
+void CpuManager::RunLoop(bool tight_loop) {
+    if (GDBStub::IsServerEnabled()) {
+        GDBStub::HandlePacket();
+
+        // If the loop is halted and we want to step, use a tiny (1) number of instructions to
+        // execute. Otherwise, get out of the loop function.
+        if (GDBStub::GetCpuHaltFlag()) {
+            if (GDBStub::GetCpuStepFlag()) {
+                tight_loop = false;
+            } else {
+                return;
+            }
+        }
+    }
+
+    auto& core_timing = system.CoreTiming();
+    core_timing.ResetRun();
+    bool keep_running{};
+    do {
+        keep_running = false;
+        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+            core_timing.SwitchContext(active_core);
+            if (core_timing.CanCurrentContextRun()) {
+                core_managers[active_core]->RunLoop(tight_loop);
+            }
+            keep_running |= core_timing.CanCurrentContextRun();
+        }
+    } while (keep_running);
+
+    if (GDBStub::IsServerEnabled()) {
+        GDBStub::SetCpuStepFlag(false);
+    }
+}
+
+} // namespace Core
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -0,0 +1,50 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+
+namespace Core {
+
+class CoreManager;
+class System;
+
+class CpuManager {
+public:
+    explicit CpuManager(System& system);
+    CpuManager(const CpuManager&) = delete;
+    CpuManager(CpuManager&&) = delete;
+
+    ~CpuManager();
+
+    CpuManager& operator=(const CpuManager&) = delete;
+    CpuManager& operator=(CpuManager&&) = delete;
+
+    void Initialize();
+    void Shutdown();
+
+    CoreManager& GetCoreManager(std::size_t index);
+    const CoreManager& GetCoreManager(std::size_t index) const;
+
+    CoreManager& GetCurrentCoreManager();
+    const CoreManager& GetCurrentCoreManager() const;
+
+    std::size_t GetActiveCoreIndex() const {
+        return active_core;
+    }
+
+    void RunLoop(bool tight_loop);
+
+private:
+    static constexpr std::size_t NUM_CPU_CORES = 4;
+
+    std::array<std::unique_ptr<CoreManager>, NUM_CPU_CORES> core_managers;
+    std::size_t active_core{}; ///< Active core, only used in single thread mode
+
+    System& system;
+};
+
+} // namespace Core
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -75,6 +75,13 @@ public:
        return nullptr;
    }

+    /// Returns if window is shown (not minimized)
+    virtual bool IsShown() const = 0;
+
+    /// Retrieves Vulkan specific handlers from the window
+    virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
+                                        void* surface) const = 0;
+
    /**
     * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
     * @param framebuffer_x Framebuffer x-coordinate that was pressed
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -15,6 +15,13 @@

 namespace Input {

+enum class AnalogDirection : u8 {
+    RIGHT,
+    LEFT,
+    UP,
+    DOWN,
+};
+
 /// An abstract class template for an input device (a button, an analog input, etc.).
 template <typename StatusType>
 class InputDevice {
@@ -23,6 +30,9 @@ public:
    virtual StatusType GetStatus() const {
        return {};
    }
+    virtual bool GetAnalogDirectionStatus(AnalogDirection direction) const {
+        return {};
+    }
 };

 /// An abstract class template for a factory that can create input devices.
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -35,7 +35,7 @@
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
+#include "core/core_manager.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -8,7 +8,6 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/scheduler.h"
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -284,13 +284,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {

 std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
    std::vector<u8> buffer;
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
    auto& memory = Core::System::GetInstance().Memory();

    if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
        buffer.resize(BufferDescriptorA()[buffer_index].Size());
        memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
    } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
        buffer.resize(BufferDescriptorX()[buffer_index].Size());
        memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
    }
@@ -305,7 +310,8 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
        return 0;
    }

-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
    const std::size_t buffer_size{GetWriteBufferSize(buffer_index)};
    if (size > buffer_size) {
        LOG_CRITICAL(Core, "size ({:016X}) is greater than buffer_size ({:016X})", size,
@@ -315,8 +321,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,

    auto& memory = Core::System::GetInstance().Memory();
    if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size,
+                   "BufferDescriptorB buffer_index {} is not large enough", buffer_index);
        memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
    } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size,
+                   "BufferDescriptorC buffer_index {} is not large enough", buffer_index);
        memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
    }

@@ -324,15 +338,35 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
 }

 std::size_t HLERequestContext::GetReadBufferSize(int buffer_index) const {
-    const bool is_buffer_a{BufferDescriptorA().size() && BufferDescriptorA()[buffer_index].Size()};
-    return is_buffer_a ? BufferDescriptorA()[buffer_index].Size()
-                       : BufferDescriptorX()[buffer_index].Size();
+    const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
+                           BufferDescriptorA()[buffer_index].Size()};
+    if (is_buffer_a) {
+        ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
+                   "BufferDescriptorA invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
+                   "BufferDescriptorA buffer_index {} is empty", buffer_index);
+        return BufferDescriptorA()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
+                   "BufferDescriptorX invalid buffer_index {}", buffer_index);
+        ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
+                   "BufferDescriptorX buffer_index {} is empty", buffer_index);
+        return BufferDescriptorX()[buffer_index].Size();
+    }
 }

 std::size_t HLERequestContext::GetWriteBufferSize(int buffer_index) const {
-    const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
-    return is_buffer_b ? BufferDescriptorB()[buffer_index].Size()
-                       : BufferDescriptorC()[buffer_index].Size();
+    const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
+                           BufferDescriptorB()[buffer_index].Size()};
+    if (is_buffer_b) {
+        ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
+                   "BufferDescriptorB invalid buffer_index {}", buffer_index);
+        return BufferDescriptorB()[buffer_index].Size();
+    } else {
+        ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
+                   "BufferDescriptorC invalid buffer_index {}", buffer_index);
+        return BufferDescriptorC()[buffer_index].Size();
+    }
 }

 std::string HLERequestContext::Description() const {
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -3,13 +3,15 @@
 // Refer to the license.txt file included.

 #include <atomic>
+#include <functional>
 #include <memory>
 #include <mutex>
 #include <utility>

 #include "common/assert.h"
 #include "common/logging/log.h"
-
+#include "core/arm/arm_interface.h"
+#include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
@@ -17,6 +19,7 @@
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/scheduler.h"
@@ -98,6 +101,7 @@ struct KernelCore::Impl {
    void Initialize(KernelCore& kernel) {
        Shutdown();

+        InitializePhysicalCores();
        InitializeSystemResourceLimit(kernel);
        InitializeThreads();
        InitializePreemption();
@@ -121,6 +125,21 @@ struct KernelCore::Impl {
        global_scheduler.Shutdown();

        named_ports.clear();
+
+        for (auto& core : cores) {
+            core.Shutdown();
+        }
+        cores.clear();
+
+        exclusive_monitor.reset();
+    }
+
+    void InitializePhysicalCores() {
+        exclusive_monitor =
+            Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount());
+        for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) {
+            cores.emplace_back(system, i, *exclusive_monitor);
+        }
    }

    // Creates the default system resource limit
@@ -186,6 +205,9 @@ struct KernelCore::Impl {
    /// the ConnectToPort SVC.
    NamedPortTable named_ports;

+    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
+    std::vector<Kernel::PhysicalCore> cores;
+
    // System context
    Core::System& system;
 };
@@ -240,6 +262,34 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
    return impl->global_scheduler;
 }

+Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {
+    return impl->cores[id];
+}
+
+const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
+    return impl->cores[id];
+}
+
+Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
+    return *impl->exclusive_monitor;
+}
+
+const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
+    return *impl->exclusive_monitor;
+}
+
+void KernelCore::InvalidateAllInstructionCaches() {
+    for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) {
+        PhysicalCore(i).ArmInterface().ClearInstructionCache();
+    }
+}
+
+void KernelCore::PrepareReschedule(std::size_t id) {
+    if (id < impl->global_scheduler.CpuCoresCount()) {
+        impl->cores[id].Stop();
+    }
+}
+
 void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) {
    impl->named_ports.emplace(std::move(name), std::move(port));
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,8 +11,9 @@
 #include "core/hle/kernel/object.h"

 namespace Core {
+class ExclusiveMonitor;
 class System;
-}
+} // namespace Core

 namespace Core::Timing {
 class CoreTiming;
@@ -25,6 +26,7 @@ class AddressArbiter;
 class ClientPort;
 class GlobalScheduler;
 class HandleTable;
+class PhysicalCore;
 class Process;
 class ResourceLimit;
 class Thread;
@@ -84,6 +86,21 @@ public:
    /// Gets the sole instance of the global scheduler
    const Kernel::GlobalScheduler& GlobalScheduler() const;

+    /// Gets the an instance of the respective physical CPU core.
+    Kernel::PhysicalCore& PhysicalCore(std::size_t id);
+
+    /// Gets the an instance of the respective physical CPU core.
+    const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
+
+    /// Stops execution of 'id' core, in order to reschedule a new thread.
+    void PrepareReschedule(std::size_t id);
+
+    Core::ExclusiveMonitor& GetExclusiveMonitor();
+
+    const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
+
+    void InvalidateAllInstructionCaches();
+
    /// Adds a port to the named port table
    void AddNamedPort(std::string name, std::shared_ptr<ClientPort> port);

--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -0,0 +1,51 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/arm/arm_interface.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/exclusive_monitor.h"
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core.h"
+#include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
+                           Core::ExclusiveMonitor& exclusive_monitor)
+    : core_index{id} {
+#ifdef ARCHITECTURE_x86_64
+    arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index);
+#else
+    arm_interface = std::make_shared<Core::ARM_Unicorn>(system);
+    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index);
+}
+
+PhysicalCore::~PhysicalCore() = default;
+
+void PhysicalCore::Run() {
+    arm_interface->Run();
+    arm_interface->ClearExclusiveState();
+}
+
+void PhysicalCore::Step() {
+    arm_interface->Step();
+}
+
+void PhysicalCore::Stop() {
+    arm_interface->PrepareReschedule();
+}
+
+void PhysicalCore::Shutdown() {
+    scheduler->Shutdown();
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -0,0 +1,77 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+
+namespace Kernel {
+class Scheduler;
+} // namespace Kernel
+
+namespace Core {
+class ARM_Interface;
+class ExclusiveMonitor;
+class System;
+} // namespace Core
+
+namespace Kernel {
+
+class PhysicalCore {
+public:
+    PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor);
+    ~PhysicalCore();
+
+    PhysicalCore(const PhysicalCore&) = delete;
+    PhysicalCore& operator=(const PhysicalCore&) = delete;
+
+    PhysicalCore(PhysicalCore&&) = default;
+    PhysicalCore& operator=(PhysicalCore&&) = default;
+
+    /// Execute current jit state
+    void Run();
+    /// Execute a single instruction in current jit.
+    void Step();
+    /// Stop JIT execution/exit
+    void Stop();
+
+    // Shutdown this physical core.
+    void Shutdown();
+
+    Core::ARM_Interface& ArmInterface() {
+        return *arm_interface;
+    }
+
+    const Core::ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
+    bool IsSystemCore() const {
+        return core_index == 3;
+    }
+
+    std::size_t CoreIndex() const {
+        return core_index;
+    }
+
+    Kernel::Scheduler& Scheduler() {
+        return *scheduler;
+    }
+
+    const Kernel::Scheduler& Scheduler() const {
+        return *scheduler;
+    }
+
+private:
+    std::size_t core_index;
+    std::unique_ptr<Core::ARM_Interface> arm_interface;
+    std::unique_ptr<Kernel::Scheduler> scheduler;
+};
+
+} // namespace Kernel
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -14,7 +14,6 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -15,7 +15,7 @@
 #include "common/string_util.h"
 #include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
+#include "core/core_manager.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/address_arbiter.h"
@@ -1863,10 +1863,14 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
    }

    auto& kernel = system.Kernel();
-    auto transfer_mem_handle = TransferMemory::Create(kernel, addr, size, perms);
+    auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);
+
+    if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
+        return reserve_result;
+    }

    auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
-    const auto result = handle_table.Create(std::move(transfer_mem_handle));
+    const auto result{handle_table.Create(std::move(transfer_mem_handle))};
    if (result.Failed()) {
        return result.Code();
    }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -13,7 +13,6 @@
 #include "common/thread_queue_list.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/errors.h"
@@ -356,7 +355,7 @@ void Thread::SetActivity(ThreadActivity value) {
        // Set status if not waiting
        if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
            SetStatus(ThreadStatus::Paused);
-            Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+            kernel.PrepareReschedule(processor_id);
        }
    } else if (status == ThreadStatus::Paused) {
        // Ready to reschedule
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -8,15 +8,23 @@
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/transfer_memory.h"
 #include "core/hle/result.h"
+#include "core/memory.h"

 namespace Kernel {

-TransferMemory::TransferMemory(KernelCore& kernel) : Object{kernel} {}
-TransferMemory::~TransferMemory() = default;
+TransferMemory::TransferMemory(KernelCore& kernel, Memory::Memory& memory)
+    : Object{kernel}, memory{memory} {}

-std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr base_address,
-                                                       u64 size, MemoryPermission permissions) {
-    std::shared_ptr<TransferMemory> transfer_memory{std::make_shared<TransferMemory>(kernel)};
+TransferMemory::~TransferMemory() {
+    // Release memory region when transfer memory is destroyed
+    Reset();
+}
+
+std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, Memory::Memory& memory,
+                                                       VAddr base_address, u64 size,
+                                                       MemoryPermission permissions) {
+    std::shared_ptr<TransferMemory> transfer_memory{
+        std::make_shared<TransferMemory>(kernel, memory)};

    transfer_memory->base_address = base_address;
    transfer_memory->memory_size = size;
@@ -27,7 +35,7 @@ std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, VAddr
 }

 const u8* TransferMemory::GetPointer() const {
-    return backing_block.get()->data();
+    return memory.GetPointer(base_address);
 }

 u64 TransferMemory::GetSize() const {
@@ -62,6 +70,52 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
    return RESULT_SUCCESS;
 }

+ResultCode TransferMemory::Reserve() {
+    auto& vm_manager{owner_process->VMManager()};
+    const auto check_range_result{vm_manager.CheckRangeState(
+        base_address, memory_size, MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+        MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::All,
+        VMAPermission::ReadWrite, MemoryAttribute::Mask, MemoryAttribute::None,
+        MemoryAttribute::IpcAndDeviceMapped)};
+
+    if (check_range_result.Failed()) {
+        return check_range_result.Code();
+    }
+
+    auto [state_, permissions_, attribute] = *check_range_result;
+
+    if (const auto result{vm_manager.ReprotectRange(
+            base_address, memory_size, SharedMemory::ConvertPermissions(owner_permissions))};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         attribute | MemoryAttribute::Locked);
+}
+
+ResultCode TransferMemory::Reset() {
+    auto& vm_manager{owner_process->VMManager()};
+    if (const auto result{vm_manager.CheckRangeState(
+            base_address, memory_size,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated,
+            MemoryState::FlagTransfer | MemoryState::FlagMemoryPoolAllocated, VMAPermission::None,
+            VMAPermission::None, MemoryAttribute::Mask, MemoryAttribute::Locked,
+            MemoryAttribute::IpcAndDeviceMapped)};
+        result.Failed()) {
+        return result.Code();
+    }
+
+    if (const auto result{
+            vm_manager.ReprotectRange(base_address, memory_size, VMAPermission::ReadWrite)};
+        result.IsError()) {
+        return result;
+    }
+
+    return vm_manager.SetMemoryAttribute(base_address, memory_size, MemoryAttribute::Mask,
+                                         MemoryAttribute::None);
+}
+
 ResultCode TransferMemory::UnmapMemory(VAddr address, u64 size) {
    if (memory_size != size) {
        return ERR_INVALID_SIZE;
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -11,6 +11,10 @@

 union ResultCode;

+namespace Memory {
+class Memory;
+}
+
 namespace Kernel {

 class KernelCore;
@@ -26,12 +30,13 @@ enum class MemoryPermission : u32;
 ///
 class TransferMemory final : public Object {
 public:
-    explicit TransferMemory(KernelCore& kernel);
+    explicit TransferMemory(KernelCore& kernel, Memory::Memory& memory);
    ~TransferMemory() override;

    static constexpr HandleType HANDLE_TYPE = HandleType::TransferMemory;

-    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, VAddr base_address, u64 size,
+    static std::shared_ptr<TransferMemory> Create(KernelCore& kernel, Memory::Memory& memory,
+                                                  VAddr base_address, u64 size,
                                                  MemoryPermission permissions);

    TransferMemory(const TransferMemory&) = delete;
@@ -80,6 +85,14 @@ public:
    ///
    ResultCode UnmapMemory(VAddr address, u64 size);

+    /// Reserves the region to be used for the transfer memory, called after the transfer memory is
+    /// created.
+    ResultCode Reserve();
+
+    /// Resets the region previously used for the transfer memory, called after the transfer memory
+    /// is closed.
+    ResultCode Reset();
+
 private:
    /// Memory block backing this instance.
    std::shared_ptr<PhysicalMemory> backing_block;
@@ -98,6 +111,8 @@ private:

    /// Whether or not this transfer memory instance has mapped memory.
    bool is_mapped = false;
+
+    Memory::Memory& memory;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -544,7 +544,8 @@ MemoryInfo VMManager::QueryMemory(VAddr address) const {

 ResultCode VMManager::SetMemoryAttribute(VAddr address, u64 size, MemoryAttribute mask,
                                         MemoryAttribute attribute) {
-    constexpr auto ignore_mask = MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped;
+    constexpr auto ignore_mask =
+        MemoryAttribute::Uncached | MemoryAttribute::DeviceMapped | MemoryAttribute::Locked;
    constexpr auto attribute_mask = ~ignore_mask;

    const auto result = CheckRangeState(
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -98,6 +98,8 @@ enum class MemoryAttribute : u32 {
    DeviceMapped = 4,
    /// Uncached memory
    Uncached = 8,
+
+    IpcAndDeviceMapped = LockedForIPC | DeviceMapped,
 };

 constexpr MemoryAttribute operator|(MemoryAttribute lhs, MemoryAttribute rhs) {
@@ -654,6 +656,35 @@ public:
    /// is scheduled.
    Common::PageTable page_table{Memory::PAGE_BITS};

+    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
+
+    /// Checks if an address range adheres to the specified states provided.
+    ///
+    /// @param address         The starting address of the address range.
+    /// @param size            The size of the address range.
+    /// @param state_mask      The memory state mask.
+    /// @param state           The state to compare the individual VMA states against,
+    ///                        which is done in the form of: (vma.state & state_mask) != state.
+    /// @param permission_mask The memory permissions mask.
+    /// @param permissions     The permission to compare the individual VMA permissions against,
+    ///                        which is done in the form of:
+    ///                        (vma.permission & permission_mask) != permission.
+    /// @param attribute_mask  The memory attribute mask.
+    /// @param attribute       The memory attributes to compare the individual VMA attributes
+    ///                        against, which is done in the form of:
+    ///                        (vma.attributes & attribute_mask) != attribute.
+    /// @param ignore_mask     The memory attributes to ignore during the check.
+    ///
+    /// @returns If successful, returns a tuple containing the memory attributes
+    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
+    ///          memory state across the memory range.
+    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
+    ///
+    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
+                                 VMAPermission permission_mask, VMAPermission permissions,
+                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
+                                 MemoryAttribute ignore_mask) const;
+
 private:
    using VMAIter = VMAMap::iterator;

@@ -707,35 +738,6 @@ private:
    /// Clears out the page table
    void ClearPageTable();

-    using CheckResults = ResultVal<std::tuple<MemoryState, VMAPermission, MemoryAttribute>>;
-
-    /// Checks if an address range adheres to the specified states provided.
-    ///
-    /// @param address         The starting address of the address range.
-    /// @param size            The size of the address range.
-    /// @param state_mask      The memory state mask.
-    /// @param state           The state to compare the individual VMA states against,
-    ///                        which is done in the form of: (vma.state & state_mask) != state.
-    /// @param permission_mask The memory permissions mask.
-    /// @param permissions     The permission to compare the individual VMA permissions against,
-    ///                        which is done in the form of:
-    ///                        (vma.permission & permission_mask) != permission.
-    /// @param attribute_mask  The memory attribute mask.
-    /// @param attribute       The memory attributes to compare the individual VMA attributes
-    ///                        against, which is done in the form of:
-    ///                        (vma.attributes & attribute_mask) != attribute.
-    /// @param ignore_mask     The memory attributes to ignore during the check.
-    ///
-    /// @returns If successful, returns a tuple containing the memory attributes
-    ///          (with ignored bits specified by ignore_mask unset), memory permissions, and
-    ///          memory state across the memory range.
-    /// @returns If not successful, returns ERR_INVALID_ADDRESS_STATE.
-    ///
-    CheckResults CheckRangeState(VAddr address, u64 size, MemoryState state_mask, MemoryState state,
-                                 VMAPermission permission_mask, VMAPermission permissions,
-                                 MemoryAttribute attribute_mask, MemoryAttribute attribute,
-                                 MemoryAttribute ignore_mask) const;
-
    /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
    ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;

--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -7,7 +7,6 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
@@ -51,17 +50,8 @@ std::shared_ptr<Thread> WaitObject::GetHighestPriorityReadyThread() const {
        if (ShouldWait(thread.get()))
            continue;

-        // A thread is ready to run if it's either in ThreadStatus::WaitSynch
-        // and the rest of the objects it is waiting on are ready.
-        bool ready_to_run = true;
-        if (thread_status == ThreadStatus::WaitSynch) {
-            ready_to_run = thread->AllWaitObjectsReady();
-        }
-
-        if (ready_to_run) {
-            candidate = thread.get();
-            candidate_priority = thread->GetPriority();
-        }
+        candidate = thread.get();
+        candidate_priority = thread->GetPriority();
    }

    return SharedFrom(candidate);
@@ -96,7 +86,7 @@ void WaitObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
    }
    if (resume) {
        thread->ResumeFromWait();
-        Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
+        kernel.PrepareReschedule(thread->GetProcessorID());
    }
 }

--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -709,8 +709,34 @@ void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
    apm_sys->SetCpuBoostMode(ctx);
 }

-IStorage::IStorage(std::vector<u8> buffer)
-    : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
+IStorageImpl::~IStorageImpl() = default;
+
+class StorageDataImpl final : public IStorageImpl {
+public:
+    explicit StorageDataImpl(std::vector<u8>&& buffer) : buffer{std::move(buffer)} {}
+
+    std::vector<u8>& GetData() override {
+        return buffer;
+    }
+
+    const std::vector<u8>& GetData() const override {
+        return buffer;
+    }
+
+    std::size_t GetSize() const override {
+        return buffer.size();
+    }
+
+private:
+    std::vector<u8> buffer;
+};
+
+IStorage::IStorage(std::vector<u8>&& buffer)
+    : ServiceFramework("IStorage"), impl{std::make_shared<StorageDataImpl>(std::move(buffer))} {
+    Register();
+}
+
+void IStorage::Register() {
    // clang-format off
        static const FunctionInfo functions[] = {
            {0, &IStorage::Open, "Open"},
@@ -723,8 +749,13 @@ IStorage::IStorage(std::vector<u8> buffer)

 IStorage::~IStorage() = default;

-const std::vector<u8>& IStorage::GetData() const {
-    return buffer;
+void IStorage::Open(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_AM, "called");
+
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IStorageAccessor>(*this);
 }

 void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
@@ -816,7 +847,7 @@ private:
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushNormalDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>());

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -825,26 +856,25 @@ private:
    void PopOutData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
        const auto storage = applet->GetBroker().PopNormalDataToGame();
        if (storage == nullptr) {
            LOG_ERROR(Service_AM,
                      "storage is a nullptr. There is no data in the current normal channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NO_DATA_IN_CHANNEL);
            return;
        }

+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void PushInteractiveInData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

        IPC::RequestParser rp{ctx};
-        applet->GetBroker().PushInteractiveDataFromGame(*rp.PopIpcInterface<IStorage>());
+        applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>());

        ASSERT(applet->IsInitialized());
        applet->ExecuteInteractive();
@@ -857,19 +887,18 @@ private:
    void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_AM, "called");

-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
        const auto storage = applet->GetBroker().PopInteractiveDataToGame();
        if (storage == nullptr) {
            LOG_ERROR(Service_AM,
                      "storage is a nullptr. There is no data in the current interactive channel");
-
+            IPC::ResponseBuilder rb{ctx, 2};
            rb.Push(ERR_NO_DATA_IN_CHANNEL);
            return;
        }

+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<IStorage>(std::move(*storage));
+        rb.PushIpcInterface<IStorage>(std::move(storage));
    }

    void GetPopOutDataEvent(Kernel::HLERequestContext& ctx) {
@@ -891,15 +920,6 @@ private:
    std::shared_ptr<Applets::Applet> applet;
 };

-void IStorage::Open(Kernel::HLERequestContext& ctx) {
-    LOG_DEBUG(Service_AM, "called");
-
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IStorageAccessor>(*this);
-}
-
 IStorageAccessor::IStorageAccessor(IStorage& storage)
    : ServiceFramework("IStorageAccessor"), backing(storage) {
    // clang-format off
@@ -921,7 +941,7 @@ void IStorageAccessor::GetSize(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 4};

    rb.Push(RESULT_SUCCESS);
-    rb.Push(static_cast<u64>(backing.buffer.size()));
+    rb.Push(static_cast<u64>(backing.GetSize()));
 }

 void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {
@@ -932,17 +952,17 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());

-    if (data.size() > backing.buffer.size() - offset) {
+    if (data.size() > backing.GetSize() - offset) {
        LOG_ERROR(Service_AM,
                  "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
-                  backing.buffer.size(), data.size(), offset);
+                  backing.GetSize(), data.size(), offset);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
        return;
    }

-    std::memcpy(backing.buffer.data() + offset, data.data(), data.size());
+    std::memcpy(backing.GetData().data() + offset, data.data(), data.size());

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
@@ -956,16 +976,16 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);

-    if (size > backing.buffer.size() - offset) {
+    if (size > backing.GetSize() - offset) {
        LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
-                  backing.buffer.size(), size, offset);
+                  backing.GetSize(), size, offset);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
        return;
    }

-    ctx.WriteBuffer(backing.buffer.data() + offset, size);
+    ctx.WriteBuffer(backing.GetData().data() + offset, size);

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
@@ -1031,7 +1051,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
    rp.SetCurrentOffset(3);
    const auto handle{rp.Pop<Kernel::Handle>()};

-    const auto transfer_mem =
+    auto transfer_mem =
        system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle);

    if (transfer_mem == nullptr) {
@@ -1047,7 +1067,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory)));
+    rb.PushIpcInterface<IStorage>(std::move(memory));
 }

 IApplicationFunctions::IApplicationFunctions(Core::System& system_)
@@ -1189,13 +1209,11 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
        u64 build_id{};
        std::memcpy(&build_id, build_id_full.data(), sizeof(u64));

-        const auto data =
-            backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
-
+        auto data = backend->GetLaunchParameter({system.CurrentProcess()->GetTitleID(), build_id});
        if (data.has_value()) {
            IPC::ResponseBuilder rb{ctx, 2, 0, 1};
            rb.Push(RESULT_SUCCESS);
-            rb.PushIpcInterface<AM::IStorage>(*data);
+            rb.PushIpcInterface<IStorage>(std::move(*data));
            launch_popped_application_specific = true;
            return;
        }
@@ -1218,7 +1236,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
        std::vector<u8> buffer(sizeof(LaunchParameterAccountPreselectedUser));
        std::memcpy(buffer.data(), &params, buffer.size());

-        rb.PushIpcInterface<AM::IStorage>(buffer);
+        rb.PushIpcInterface<IStorage>(std::move(buffer));
        launch_popped_account_preselect = true;
        return;
    }
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -12,7 +12,8 @@

 namespace Kernel {
 class KernelCore;
-}
+class TransferMemory;
+} // namespace Kernel

 namespace Service::NVFlinger {
 class NVFlinger;
@@ -188,19 +189,36 @@ private:
    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

+class IStorageImpl {
+public:
+    virtual ~IStorageImpl();
+    virtual std::vector<u8>& GetData() = 0;
+    virtual const std::vector<u8>& GetData() const = 0;
+    virtual std::size_t GetSize() const = 0;
+};
+
 class IStorage final : public ServiceFramework<IStorage> {
 public:
-    explicit IStorage(std::vector<u8> buffer);
+    explicit IStorage(std::vector<u8>&& buffer);
    ~IStorage() override;

-    const std::vector<u8>& GetData() const;
+    std::vector<u8>& GetData() {
+        return impl->GetData();
+    }
+
+    const std::vector<u8>& GetData() const {
+        return impl->GetData();
+    }
+
+    std::size_t GetSize() const {
+        return impl->GetSize();
+    }

 private:
+    void Register();
    void Open(Kernel::HLERequestContext& ctx);

-    std::vector<u8> buffer;
-
-    friend class IStorageAccessor;
+    std::shared_ptr<IStorageImpl> impl;
 };

 class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -50,16 +50,17 @@ AppletDataBroker::RawChannelData AppletDataBroker::PeekDataToAppletForDebug() co
    return {std::move(out_normal), std::move(out_interactive)};
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToGame() {
    if (out_channel.empty())
        return nullptr;

    auto out = std::move(out_channel.front());
    out_channel.pop_front();
+    pop_out_data_event.writable->Clear();
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    if (in_channel.empty())
        return nullptr;

@@ -68,16 +69,17 @@ std::unique_ptr<IStorage> AppletDataBroker::PopNormalDataToApplet() {
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToGame() {
    if (out_interactive_channel.empty())
        return nullptr;

    auto out = std::move(out_interactive_channel.front());
    out_interactive_channel.pop_front();
+    pop_interactive_out_data_event.writable->Clear();
    return out;
 }

-std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
+std::shared_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    if (in_interactive_channel.empty())
        return nullptr;

@@ -86,21 +88,21 @@ std::unique_ptr<IStorage> AppletDataBroker::PopInteractiveDataToApplet() {
    return out;
 }

-void AppletDataBroker::PushNormalDataFromGame(IStorage storage) {
-    in_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushNormalDataFromApplet(IStorage storage) {
-    out_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_channel.emplace_back(std::move(storage));
    pop_out_data_event.writable->Signal();
 }

-void AppletDataBroker::PushInteractiveDataFromGame(IStorage storage) {
-    in_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage) {
+    in_interactive_channel.emplace_back(std::move(storage));
 }

-void AppletDataBroker::PushInteractiveDataFromApplet(IStorage storage) {
-    out_interactive_channel.push_back(std::make_unique<IStorage>(storage));
+void AppletDataBroker::PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage) {
+    out_interactive_channel.emplace_back(std::move(storage));
    pop_interactive_out_data_event.writable->Signal();
 }

--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -72,17 +72,17 @@ public:
    // Retrieves but does not pop the data sent to applet.
    RawChannelData PeekDataToAppletForDebug() const;

-    std::unique_ptr<IStorage> PopNormalDataToGame();
-    std::unique_ptr<IStorage> PopNormalDataToApplet();
+    std::shared_ptr<IStorage> PopNormalDataToGame();
+    std::shared_ptr<IStorage> PopNormalDataToApplet();

-    std::unique_ptr<IStorage> PopInteractiveDataToGame();
-    std::unique_ptr<IStorage> PopInteractiveDataToApplet();
+    std::shared_ptr<IStorage> PopInteractiveDataToGame();
+    std::shared_ptr<IStorage> PopInteractiveDataToApplet();

-    void PushNormalDataFromGame(IStorage storage);
-    void PushNormalDataFromApplet(IStorage storage);
+    void PushNormalDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushNormalDataFromApplet(std::shared_ptr<IStorage>&& storage);

-    void PushInteractiveDataFromGame(IStorage storage);
-    void PushInteractiveDataFromApplet(IStorage storage);
+    void PushInteractiveDataFromGame(std::shared_ptr<IStorage>&& storage);
+    void PushInteractiveDataFromApplet(std::shared_ptr<IStorage>&& storage);

    void SignalStateChanged() const;

@@ -94,16 +94,16 @@ private:
    // Queues are named from applet's perspective

    // PopNormalDataToApplet and PushNormalDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_channel;
+    std::deque<std::shared_ptr<IStorage>> in_channel;

    // PopNormalDataToGame and PushNormalDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_channel;
+    std::deque<std::shared_ptr<IStorage>> out_channel;

    // PopInteractiveDataToApplet and PushInteractiveDataFromGame
-    std::deque<std::unique_ptr<IStorage>> in_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> in_interactive_channel;

    // PopInteractiveDataToGame and PushInteractiveDataFromApplet
-    std::deque<std::unique_ptr<IStorage>> out_interactive_channel;
+    std::deque<std::shared_ptr<IStorage>> out_interactive_channel;

    Kernel::EventPair state_changed_event;

--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -186,7 +186,7 @@ void Error::Execute() {

 void Error::DisplayCompleted() {
    complete = true;
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -20,7 +20,7 @@ namespace Service::AM::Applets {
 constexpr ResultCode ERROR_INVALID_PIN{ErrorModule::PCTL, 221};

 static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix) {
-    std::unique_ptr<IStorage> storage = broker.PopNormalDataToApplet();
+    std::shared_ptr<IStorage> storage = broker.PopNormalDataToApplet();
    for (; storage != nullptr; storage = broker.PopNormalDataToApplet()) {
        const auto data = storage->GetData();
        LOG_INFO(Service_AM,
@@ -148,7 +148,7 @@ void Auth::AuthFinished(bool successful) {
    std::vector<u8> out(sizeof(Return));
    std::memcpy(out.data(), &return_, sizeof(Return));

-    broker.PushNormalDataFromApplet(IStorage{out});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(out)));
    broker.SignalStateChanged();
 }

@@ -198,7 +198,7 @@ void PhotoViewer::Execute() {
 }

 void PhotoViewer::ViewFinished() {
-    broker.PushNormalDataFromApplet(IStorage{{}});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>{}));
    broker.SignalStateChanged();
 }

@@ -234,8 +234,8 @@ void StubApplet::ExecuteInteractive() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "ExecuteInteractive");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

@@ -243,8 +243,8 @@ void StubApplet::Execute() {
    LOG_WARNING(Service_AM, "called (STUBBED)");
    LogCurrentStorage(broker, "Execute");

-    broker.PushNormalDataFromApplet(IStorage{std::vector<u8>(0x1000)});
-    broker.PushInteractiveDataFromApplet(IStorage{std::vector<u8>(0x1000)});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
+    broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::vector<u8>(0x1000)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -50,7 +50,7 @@ void ProfileSelect::ExecuteInteractive() {

 void ProfileSelect::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
        return;
    }

@@ -71,7 +71,7 @@ void ProfileSelect::SelectionComplete(std::optional<Common::UUID> uuid) {

    final_data = std::vector<u8>(sizeof(UserSelectionOutput));
    std::memcpy(final_data.data(), &output, final_data.size());
-    broker.PushNormalDataFromApplet(IStorage{final_data});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
    broker.SignalStateChanged();
 }

--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -102,7 +102,8 @@ void SoftwareKeyboard::ExecuteInteractive() {

 void SoftwareKeyboard::Execute() {
    if (complete) {
-        broker.PushNormalDataFromApplet(IStorage{final_data});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(final_data)));
+        broker.SignalStateChanged();
        return;
    }

@@ -119,7 +120,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
        std::vector<u8> output_sub(SWKBD_OUTPUT_BUFFER_SIZE);

        if (config.utf_8) {
-            const u64 size = text->size() + 8;
+            const u64 size = text->size() + sizeof(u64);
            const auto new_text = Common::UTF16ToUTF8(*text);

            std::memcpy(output_sub.data(), &size, sizeof(u64));
@@ -130,7 +131,7 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
            std::memcpy(output_main.data() + 4, new_text.data(),
                        std::min(new_text.size(), SWKBD_OUTPUT_BUFFER_SIZE - 4));
        } else {
-            const u64 size = text->size() * 2 + 8;
+            const u64 size = text->size() * 2 + sizeof(u64);
            std::memcpy(output_sub.data(), &size, sizeof(u64));
            std::memcpy(output_sub.data() + 8, text->data(),
                        std::min(text->size() * 2, SWKBD_OUTPUT_BUFFER_SIZE - 8));
@@ -144,15 +145,15 @@ void SoftwareKeyboard::WriteText(std::optional<std::u16string> text) {
        final_data = output_main;

        if (complete) {
-            broker.PushNormalDataFromApplet(IStorage{output_main});
+            broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
            broker.SignalStateChanged();
        } else {
-            broker.PushInteractiveDataFromApplet(IStorage{output_sub});
+            broker.PushInteractiveDataFromApplet(std::make_shared<IStorage>(std::move(output_sub)));
        }
    } else {
        output_main[0] = 1;
        complete = true;
-        broker.PushNormalDataFromApplet(IStorage{output_main});
+        broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(output_main)));
        broker.SignalStateChanged();
    }
 }
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -284,7 +284,7 @@ void WebBrowser::Finalize() {
    std::vector<u8> data(sizeof(WebCommonReturnValue));
    std::memcpy(data.data(), &out, sizeof(WebCommonReturnValue));

-    broker.PushNormalDataFromApplet(IStorage{data});
+    broker.PushNormalDataFromApplet(std::make_shared<IStorage>(std::move(data)));
    broker.SignalStateChanged();

    if (!temporary_dir.empty() && FileUtil::IsDirectory(temporary_dir)) {
--- a/src/core/hle/service/bcat/backend/backend.cpp
+++ b/src/core/hle/service/bcat/backend/backend.cpp
@@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name,
 }

 bool NullBackend::Clear(u64 title_id) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}");
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id);

    return true;
 }

 void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) {
-    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id,
+    LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id,
              Common::HexToString(passphrase));
 }

--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -420,7 +420,7 @@ public:
            return;
        }

-        IFile file(result.Unwrap());
+        auto file = std::make_shared<IFile>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -445,7 +445,7 @@ public:
            return;
        }

-        IDirectory directory(result.Unwrap());
+        auto directory = std::make_shared<IDirectory>(result.Unwrap());

        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
@@ -794,8 +794,8 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
 void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_FS, "called");

-    IFileSystem filesystem(fsc.OpenSDMC().Unwrap(),
-                           SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));
+    auto filesystem = std::make_shared<IFileSystem>(
+        fsc.OpenSDMC().Unwrap(), SizeGetter::FromStorageId(fsc, FileSys::StorageId::SdCard));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -846,7 +846,8 @@ void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) {
        id = FileSys::StorageId::NandSystem;
    }

-    IFileSystem filesystem(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));
+    auto filesystem =
+        std::make_shared<IFileSystem>(std::move(dir.Unwrap()), SizeGetter::FromStorageId(fsc, id));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -898,7 +899,7 @@ void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
        return;
    }

-    IStorage storage(std::move(romfs.Unwrap()));
+    auto storage = std::make_shared<IStorage>(std::move(romfs.Unwrap()));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
@@ -937,7 +938,8 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) {

    FileSys::PatchManager pm{title_id};

-    IStorage storage(pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));
+    auto storage = std::make_shared<IStorage>(
+        pm.PatchRomFS(std::move(data.Unwrap()), 0, FileSys::ContentRecordType::Data));

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -250,6 +250,10 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
    auto& rstick_entry = npad_pad_states[controller_idx].r_stick;
    const auto& button_state = buttons[controller_idx];
    const auto& analog_state = sticks[controller_idx];
+    const auto [stick_l_x_f, stick_l_y_f] =
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus();
+    const auto [stick_r_x_f, stick_r_y_f] =
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus();

    using namespace Settings::NativeButton;
    pad_state.a.Assign(button_state[A - BUTTON_HID_BEGIN]->GetStatus());
@@ -270,23 +274,32 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
    pad_state.d_right.Assign(button_state[DRight - BUTTON_HID_BEGIN]->GetStatus());
    pad_state.d_down.Assign(button_state[DDown - BUTTON_HID_BEGIN]->GetStatus());

-    pad_state.l_stick_left.Assign(button_state[LStick_Left - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.l_stick_up.Assign(button_state[LStick_Up - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.l_stick_right.Assign(button_state[LStick_Right - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.l_stick_down.Assign(button_state[LStick_Down - BUTTON_HID_BEGIN]->GetStatus());
+    pad_state.l_stick_right.Assign(
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
+            Input::AnalogDirection::RIGHT));
+    pad_state.l_stick_left.Assign(
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
+            Input::AnalogDirection::LEFT));
+    pad_state.l_stick_up.Assign(
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
+            Input::AnalogDirection::UP));
+    pad_state.l_stick_down.Assign(
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
+            Input::AnalogDirection::DOWN));

-    pad_state.r_stick_left.Assign(button_state[RStick_Left - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.r_stick_up.Assign(button_state[RStick_Up - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.r_stick_right.Assign(button_state[RStick_Right - BUTTON_HID_BEGIN]->GetStatus());
-    pad_state.r_stick_down.Assign(button_state[RStick_Down - BUTTON_HID_BEGIN]->GetStatus());
+    pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+                                    ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT));
+    pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+                                      ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));
+    pad_state.r_stick_right.Assign(
+        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+            ->GetAnalogDirectionStatus(Input::AnalogDirection::UP));
+    pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+                                      ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN));

    pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus());
    pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus());

-    const auto [stick_l_x_f, stick_l_y_f] =
-        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetStatus();
-    const auto [stick_r_x_f, stick_r_y_f] =
-        analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]->GetStatus();
    lstick_entry.x = static_cast<s32>(stick_l_x_f * HID_JOYSTICK_MAX);
    lstick_entry.y = static_cast<s32>(stick_l_y_f * HID_JOYSTICK_MAX);
    rstick_entry.x = static_cast<s32>(stick_r_x_f * HID_JOYSTICK_MAX);
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -50,16 +50,16 @@ private:
        IPC::RequestParser rp{ctx};
        const auto process_id = rp.PopRaw<u64>();

-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }

-        LOG_DEBUG(Service_PREPO,
-                  "called, type={:02X}, process_id={:016X}, data1_size={:016X}, data2_size={:016X}",
-                  static_cast<u8>(Type), process_id, data1.size(), data2.size());
+        LOG_DEBUG(Service_PREPO, "called, type={:02X}, process_id={:016X}, data1_size={:016X}",
+                  static_cast<u8>(Type), process_id, data[0].size());

        const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -70,19 +70,19 @@ private:
        IPC::RequestParser rp{ctx};
        const auto user_id = rp.PopRaw<u128>();
        const auto process_id = rp.PopRaw<u64>();
-
-        const auto data1 = ctx.ReadBuffer(0);
-        const auto data2 = ctx.ReadBuffer(1);
+        std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+        if (Type == Core::Reporter::PlayReportType::New) {
+            data.emplace_back(ctx.ReadBuffer(1));
+        }

        LOG_DEBUG(
            Service_PREPO,
-            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}, "
-            "data2_size={:016X}",
-            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data1.size(), data2.size());
+            "called, type={:02X}, user_id={:016X}{:016X}, process_id={:016X}, data1_size={:016X}",
+            static_cast<u8>(Type), user_id[1], user_id[0], process_id, data[0].size());

        const auto& reporter{system.GetReporter()};
-        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), {data1, data2},
-                                process_id, user_id);
+        reporter.SavePlayReport(Type, system.CurrentProcess()->GetTitleID(), data, process_id,
+                                user_id);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -42,6 +42,26 @@ void BSD::Socket(Kernel::HLERequestContext& ctx) {
    rb.Push<u32>(0); // bsd errno
 }

+void BSD::Select(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 4};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u32>(0); // ret
+    rb.Push<u32>(0); // bsd errno
+}
+
+void BSD::Bind(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 4};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u32>(0); // ret
+    rb.Push<u32>(0); // bsd errno
+}
+
 void BSD::Connect(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service, "(STUBBED) called");

@@ -52,6 +72,26 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) {
    rb.Push<u32>(0); // bsd errno
 }

+void BSD::Listen(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 4};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u32>(0); // ret
+    rb.Push<u32>(0); // bsd errno
+}
+
+void BSD::SetSockOpt(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service, "(STUBBED) called");
+
+    IPC::ResponseBuilder rb{ctx, 4};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.Push<u32>(0); // ret
+    rb.Push<u32>(0); // bsd errno
+}
+
 void BSD::SendTo(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service, "(STUBBED) called");

@@ -80,7 +120,7 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
        {2, &BSD::Socket, "Socket"},
        {3, nullptr, "SocketExempt"},
        {4, nullptr, "Open"},
-        {5, nullptr, "Select"},
+        {5, &BSD::Select, "Select"},
        {6, nullptr, "Poll"},
        {7, nullptr, "Sysctl"},
        {8, nullptr, "Recv"},
@@ -88,15 +128,15 @@ BSD::BSD(const char* name) : ServiceFramework(name) {
        {10, nullptr, "Send"},
        {11, &BSD::SendTo, "SendTo"},
        {12, nullptr, "Accept"},
-        {13, nullptr, "Bind"},
+        {13, &BSD::Bind, "Bind"},
        {14, &BSD::Connect, "Connect"},
        {15, nullptr, "GetPeerName"},
        {16, nullptr, "GetSockName"},
        {17, nullptr, "GetSockOpt"},
-        {18, nullptr, "Listen"},
+        {18, &BSD::Listen, "Listen"},
        {19, nullptr, "Ioctl"},
        {20, nullptr, "Fcntl"},
-        {21, nullptr, "SetSockOpt"},
+        {21, &BSD::SetSockOpt, "SetSockOpt"},
        {22, nullptr, "Shutdown"},
        {23, nullptr, "ShutdownAllSockets"},
        {24, nullptr, "Write"},
--- a/src/core/hle/service/sockets/bsd.h
+++ b/src/core/hle/service/sockets/bsd.h
@@ -18,7 +18,11 @@ private:
    void RegisterClient(Kernel::HLERequestContext& ctx);
    void StartMonitoring(Kernel::HLERequestContext& ctx);
    void Socket(Kernel::HLERequestContext& ctx);
+    void Select(Kernel::HLERequestContext& ctx);
+    void Bind(Kernel::HLERequestContext& ctx);
    void Connect(Kernel::HLERequestContext& ctx);
+    void Listen(Kernel::HLERequestContext& ctx);
+    void SetSockOpt(Kernel::HLERequestContext& ctx);
    void SendTo(Kernel::HLERequestContext& ctx);
    void Close(Kernel::HLERequestContext& ctx);

--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -371,6 +371,11 @@ enum class SDMCSize : u64 {
    S1TB = 0x10000000000ULL,
 };

+enum class RendererBackend {
+    OpenGL = 0,
+    Vulkan = 1,
+};
+
 struct Values {
    // System
    bool use_docked_mode;
@@ -419,6 +424,10 @@ struct Values {
    SDMCSize sdmc_size;

    // Renderer
+    RendererBackend renderer_backend;
+    bool renderer_debug;
+    int vulkan_device;
+
    float resolution_factor;
    bool use_frame_limit;
    u16 frame_limit;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -46,6 +46,16 @@ static u64 GenerateTelemetryId() {
    return telemetry_id;
 }

+static const char* TranslateRenderer(Settings::RendererBackend backend) {
+    switch (backend) {
+    case Settings::RendererBackend::OpenGL:
+        return "OpenGL";
+    case Settings::RendererBackend::Vulkan:
+        return "Vulkan";
+    }
+    return "Unknown";
+}
+
 u64 GetTelemetryId() {
    u64 telemetry_id{};
    const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -169,7 +179,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
    AddField(field_type, "Audio_SinkId", Settings::values.sink_id);
    AddField(field_type, "Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
    AddField(field_type, "Core_UseMultiCore", Settings::values.use_multi_core);
-    AddField(field_type, "Renderer_Backend", "OpenGL");
+    AddField(field_type, "Renderer_Backend", TranslateRenderer(Settings::values.renderer_backend));
    AddField(field_type, "Renderer_ResolutionFactor", Settings::values.resolution_factor);
    AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
    AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -41,6 +41,7 @@ void Shutdown() {
    Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
    motion_emu.reset();
    sdl.reset();
+    udp.reset();
 }

 Keyboard* GetKeyboard() {
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -342,6 +342,22 @@ public:
        return std::make_tuple<float, float>(0.0f, 0.0f);
    }

+    bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
+        const auto [x, y] = GetStatus();
+        const float directional_deadzone = 0.4f;
+        switch (direction) {
+        case Input::AnalogDirection::RIGHT:
+            return x > directional_deadzone;
+        case Input::AnalogDirection::LEFT:
+            return x < -directional_deadzone;
+        case Input::AnalogDirection::UP:
+            return y > directional_deadzone;
+        case Input::AnalogDirection::DOWN:
+            return y < -directional_deadzone;
+        }
+        return false;
+    }
+
 private:
    std::shared_ptr<SDLJoystick> joystick;
    const int axis_x;
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -14,7 +14,6 @@
 #include "input_common/udp/client.h"
 #include "input_common/udp/protocol.h"

-using boost::asio::ip::address_v4;
 using boost::asio::ip::udp;

 namespace InputCommon::CemuhookUDP {
@@ -31,10 +30,10 @@ public:

    explicit Socket(const std::string& host, u16 port, u8 pad_index, u32 client_id,
                    SocketCallback callback)
-        : client_id(client_id), timer(io_service),
-          send_endpoint(udp::endpoint(address_v4::from_string(host), port)),
-          socket(io_service, udp::endpoint(udp::v4(), 0)), pad_index(pad_index),
-          callback(std::move(callback)) {}
+        : callback(std::move(callback)), timer(io_service),
+          socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id),
+          pad_index(pad_index),
+          send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {}

    void Stop() {
        io_service.stop();
@@ -126,7 +125,7 @@ static void SocketLoop(Socket* socket) {

 Client::Client(std::shared_ptr<DeviceStatus> status, const std::string& host, u16 port,
               u8 pad_index, u32 client_id)
-    : status(status) {
+    : status(std::move(status)) {
    StartCommunication(host, port, pad_index, client_id);
 }

@@ -207,7 +206,7 @@ void TestCommunication(const std::string& host, u16 port, u8 pad_index, u32 clie
        Common::Event success_event;
        SocketCallback callback{[](Response::Version version) {}, [](Response::PortInfo info) {},
                                [&](Response::PadData data) { success_event.Set(); }};
-        Socket socket{host, port, pad_index, client_id, callback};
+        Socket socket{host, port, pad_index, client_id, std::move(callback)};
        std::thread worker_thread{SocketLoop, &socket};
        bool result = success_event.WaitFor(std::chrono::seconds(8));
        socket.Stop();
@@ -267,7 +266,7 @@ CalibrationConfigurationJob::CalibrationConfigurationJob(
                                        complete_event.Set();
                                    }
                                }};
-        Socket socket{host, port, pad_index, client_id, callback};
+        Socket socket{host, port, pad_index, client_id, std::move(callback)};
        std::thread worker_thread{SocketLoop, &socket};
        complete_event.Wait();
        socket.Stop();
--- a/src/input_common/udp/client.h
+++ b/src/input_common/udp/client.h
@@ -11,7 +11,6 @@
 #include <string>
 #include <thread>
 #include <tuple>
-#include <vector>
 #include "common/common_types.h"
 #include "common/thread.h"
 #include "common/vector_math.h"
--- a/src/input_common/udp/protocol.h
+++ b/src/input_common/udp/protocol.h
@@ -7,7 +7,6 @@
 #include <array>
 #include <optional>
 #include <type_traits>
-#include <vector>
 #include <boost/crc.hpp>
 #include "common/bit_field.h"
 #include "common/swap.h"
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "common/logging/log.h"
+#include <mutex>
+#include <tuple>
+
 #include "common/param_package.h"
 #include "core/frontend/input.h"
 #include "core/settings.h"
@@ -14,7 +16,7 @@ namespace InputCommon::CemuhookUDP {
 class UDPTouchDevice final : public Input::TouchDevice {
 public:
    explicit UDPTouchDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {}
-    std::tuple<float, float, bool> GetStatus() const {
+    std::tuple<float, float, bool> GetStatus() const override {
        std::lock_guard guard(status->update_mutex);
        return status->touch_status;
    }
@@ -26,7 +28,7 @@ private:
 class UDPMotionDevice final : public Input::MotionDevice {
 public:
    explicit UDPMotionDevice(std::shared_ptr<DeviceStatus> status_) : status(std::move(status_)) {}
-    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
        std::lock_guard guard(status->update_mutex);
        return status->motion_status;
    }
--- a/src/input_common/udp/udp.h
+++ b/src/input_common/udp/udp.h
@@ -2,15 +2,13 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#pragma once
+
 #include <memory>
-#include <unordered_map>
-#include "input_common/main.h"
-#include "input_common/udp/client.h"

 namespace InputCommon::CemuhookUDP {

-class UDPTouchDevice;
-class UDPMotionDevice;
+class Client;

 class State {
 public:
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
    gpu_synch.h
    gpu_thread.cpp
    gpu_thread.h
+    guest_driver.cpp
+    guest_driver.h
    macro_interpreter.cpp
    macro_interpreter.h
    memory_manager.cpp
@@ -154,6 +156,7 @@ if (ENABLE_VULKAN)
        renderer_vulkan/maxwell_to_vk.cpp
        renderer_vulkan/maxwell_to_vk.h
        renderer_vulkan/renderer_vulkan.h
+        renderer_vulkan/renderer_vulkan.cpp
        renderer_vulkan/vk_blit_screen.cpp
        renderer_vulkan/vk_blit_screen.h
        renderer_vulkan/vk_buffer_cache.cpp
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -101,7 +101,10 @@ public:
    void TickFrame() {
        ++epoch;
        while (!pending_destruction.empty()) {
-            if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+            // Delay at least 4 frames before destruction.
+            // This is due to triple buffering happening on some drivers.
+            static constexpr u64 epochs_to_destroy = 5;
+            if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
                break;
            }
            pending_destruction.pop_front();
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
 #include "video_core/textures/texture.h"

 namespace Tegra::Engines {
@@ -106,6 +107,9 @@ public:
    virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                    u64 offset) const = 0;
    virtual u32 GetBoundBuffer() const = 0;
+
+    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
+    virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
 };

 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
    return result;
 }

+VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 void KeplerCompute::ProcessLaunch() {
    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,10 @@ public:
        return regs.tex_cb_index;
    }

+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
 private:
    Core::System& system;
    VideoCore::RasterizerInterface& rasterizer;
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
    return result;
 }

+VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
+const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 } // namespace Tegra::Engines
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -704,8 +704,8 @@ public:
                INSERT_UNION_PADDING_WORDS(0x15);

                s32 stencil_back_func_ref;
-                u32 stencil_back_func_mask;
                u32 stencil_back_mask;
+                u32 stencil_back_func_mask;

                INSERT_UNION_PADDING_WORDS(0xC);

@@ -862,7 +862,11 @@ public:

                float point_size;

-                INSERT_UNION_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x1);
+
+                u32 point_sprite_enable;
+
+                INSERT_UNION_PADDING_WORDS(0x5);

                u32 zeta_enable;

@@ -1306,6 +1310,10 @@ public:
        return regs.tex_cb_index;
    }

+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
    /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
    /// we've seen used.
    using MacroMemory = std::array<u32, 0x40000>;
@@ -1454,8 +1462,8 @@ ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
 ASSERT_REG_POSITION(patch_vertices, 0x373);
 ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
-ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D6);
-ASSERT_REG_POSITION(stencil_back_mask, 0x3D7);
+ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
+ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(depth_bounds, 0x3E7);
@@ -1490,6 +1498,7 @@ ASSERT_REG_POSITION(vb_element_base, 0x50D);
 ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
 ASSERT_REG_POSITION(point_size, 0x546);
+ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 ASSERT_REG_POSITION(zeta_enable, 0x54E);
 ASSERT_REG_POSITION(multisample_control, 0x54F);
 ASSERT_REG_POSITION(condition, 0x554);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
    Exch = 8,
 };

+enum class GlobalAtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+    SafeAdd = 10,
+};
+
+enum class GlobalAtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    F32_FTZ_RN = 3,
+    F16x2_FTZ_RN = 4,
+    S64 = 5,
+};
+
 enum class UniformType : u64 {
    UnsignedByte = 0,
    SignedByte = 1,
@@ -602,6 +624,19 @@ enum class ShuffleOperation : u64 {
    Bfly = 3, // shuffleXorNV
 };

+enum class ShfType : u64 {
+    Bits32 = 0,
+    U64 = 2,
+    S64 = 3,
+};
+
+enum class ShfXmode : u64 {
+    None = 0,
+    HI = 1,
+    X = 2,
+    XHI = 3,
+};
+
 union Instruction {
    constexpr Instruction& operator=(const Instruction& instr) {
        value = instr.value;
@@ -753,6 +788,13 @@ union Instruction {
        BitField<39, 1, u64> wrap;
    } shr;

+    union {
+        BitField<37, 2, ShfType> type;
+        BitField<48, 2, ShfXmode> xmode;
+        BitField<50, 1, u64> wrap;
+        BitField<20, 6, u64> immediate;
+    } shf;
+
    union {
        BitField<39, 5, u64> shift_amount;
        BitField<48, 1, u64> negate_b;
@@ -957,6 +999,12 @@ union Instruction {
        BitField<46, 2, u64> cache_mode;
    } stg;

+    union {
+        BitField<52, 4, GlobalAtomicOp> operation;
+        BitField<49, 3, GlobalAtomicType> type;
+        BitField<28, 20, s64> offset;
+    } atom;
+
    union {
        BitField<52, 4, AtomicOp> operation;
        BitField<28, 2, AtomicType> type;
@@ -1095,6 +1143,11 @@ union Instruction {
        BitField<55, 1, u64> ftz;
    } fset;

+    union {
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, PredCondition> cond;
+    } fcmp;
+
    union {
        BitField<49, 1, u64> bf;
        BitField<35, 3, PredCondition> cond;
@@ -1675,6 +1728,7 @@ public:
        BFE_C,
        BFE_R,
        BFE_IMM,
+        BFI_RC,
        BFI_IMM_R,
        BRA,
        BRX,
@@ -1690,6 +1744,7 @@ public:
        ST_S,
        ST,    // Store in generic memory
        STG,   // Store in global memory
+        ATOM,  // Atomic operation on global memory
        ATOMS, // Atomic operation on shared memory
        AL2P,  // Transforms attribute memory into physical memory
        TEX,
@@ -1771,6 +1826,7 @@ public:
        ICMP_R,
        ICMP_CR,
        ICMP_IMM,
+        FCMP_R,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -1994,6 +2050,7 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("101-------------", Id::ST, Type::Memory, "ST"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
@@ -2074,6 +2131,7 @@ private:
            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+            INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -2098,6 +2156,7 @@ private:
            INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
            INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
            INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
+            INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
            INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -23,7 +23,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
 GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
    : system{system}, renderer{renderer}, is_async{is_async} {
    auto& rasterizer{renderer.Rasterizer()};
-    memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
+    memory_manager = std::make_unique<Tegra::MemoryManager>(system);
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -86,7 +86,7 @@ struct CommandDataContainer {
 struct SynchState final {
    std::atomic_bool is_running{true};

-    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    using CommandQueue = Common::MPSCQueue<CommandDataContainer>;
    CommandQueue queue;
    u64 last_fence{};
    std::atomic<u64> signaled_fence{};
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,36 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <limits>
+
+#include "video_core/guest_driver.h"
+
+namespace VideoCore {
+
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
+    if (texture_handler_size_deduced) {
+        return;
+    }
+    const std::size_t size = bound_offsets.size();
+    if (size < 2) {
+        return;
+    }
+    std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
+    u32 min_val = std::numeric_limits<u32>::max();
+    for (std::size_t i = 1; i < size; ++i) {
+        if (bound_offsets[i] == bound_offsets[i - 1]) {
+            continue;
+        }
+        const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
+        min_val = std::min(min_val, new_min);
+    }
+    if (min_val > 2) {
+        return;
+    }
+    texture_handler_size_deduced = true;
+    texture_handler_size = min_texture_handler_size * min_val;
+}
+
+} // namespace VideoCore
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,41 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCore {
+
+/**
+ * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
+ * information necessary for impossible to avoid HLE methods like shader tracks as they are
+ * Entscheidungsproblems.
+ */
+class GuestDriverProfile {
+public:
+    void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
+    u32 GetTextureHandlerSize() const {
+        return texture_handler_size;
+    }
+
+    bool TextureHandlerSizeKnown() const {
+        return texture_handler_size_deduced;
+    }
+
+private:
+    // Minimum size of texture handler any driver can use.
+    static constexpr u32 min_texture_handler_size = 4;
+    // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
+    // use 4 bytes instead. Thus, certain drivers may squish the size.
+    static constexpr u32 default_texture_handler_size = 8;
+
+    u32 texture_handler_size = default_texture_handler_size;
+    bool texture_handler_size_deduced = false;
+};
+
+} // namespace VideoCore
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,13 +9,12 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"

 namespace Tegra {

-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
-    : rasterizer{rasterizer}, system{system} {
+MemoryManager::MemoryManager(Core::System& system) : system{system} {
    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
              Common::PageType::Unmapped);
@@ -84,7 +83,8 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    const auto cpu_addr = GpuToCpuAddress(gpu_addr);
    ASSERT(cpu_addr);

-    rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
+    system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
+
    UnmapRange(gpu_addr, aligned_size);
    ASSERT(system.CurrentProcess()
               ->VMManager()
@@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
            std::memcpy(dest_buffer, src_ptr, copy_amount);
            break;
        }
@@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
            u8* dest_ptr{page_table.pointers[page_index] + page_offset};
-            rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+            system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
            std::memcpy(dest_ptr, src_buffer, copy_amount);
            break;
        }
@@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::
        switch (page_table.attributes[page_index]) {
        case Common::PageType::Memory: {
            const u8* src_ptr{page_table.pointers[page_index] + page_offset};
-            rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+            system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
            WriteBlock(dest_addr, src_ptr, copy_amount);
            break;
        }
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,10 +10,6 @@
 #include "common/common_types.h"
 #include "common/page_table.h"

-namespace VideoCore {
-class RasterizerInterface;
-}
-
 namespace Core {
 class System;
 }
@@ -51,7 +47,7 @@ struct VirtualMemoryArea {

 class MemoryManager final {
 public:
-    explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+    explicit MemoryManager(Core::System& system);
    ~MemoryManager();

    GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -176,7 +172,6 @@ private:

    Common::PageTable page_table{page_bits};
    VMAMap vma_map;
-    VideoCore::RasterizerInterface& rasterizer;

    Core::System& system;
 };
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
+#include "video_core/guest_driver.h"

 namespace Tegra {
 class MemoryManager;
@@ -78,5 +79,18 @@ public:
    /// Initialize disk cached resources for the game being emulated
    virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
                                   const DiskResourceLoadCallback& callback = {}) {}
+
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+    GuestDriverProfile& AccessGuestDriverProfile() {
+        return guest_driver_profile;
+    }
+
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
+    const GuestDriverProfile& AccessGuestDriverProfile() const {
+        return guest_driver_profile;
+    }
+
+private:
+    GuestDriverProfile guest_driver_profile{};
 };
 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -55,16 +55,20 @@ namespace {

 template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
-                                               Tegra::Engines::ShaderType shader_type) {
+                                               Tegra::Engines::ShaderType shader_type,
+                                               std::size_t index = 0) {
    if (entry.IsBindless()) {
        const Tegra::Texture::TextureHandle tex_handle =
            engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
        return engine.GetTextureInfo(tex_handle);
    }
+    const auto& gpu_profile = engine.AccessGuestDriverProfile();
+    const u32 offset =
+        entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
-        return engine.GetStageTexture(shader_type, entry.GetOffset());
+        return engine.GetStageTexture(shader_type, offset);
    } else {
-        return engine.GetTexture(entry.GetOffset());
+        return engine.GetTexture(offset);
    }
 }

@@ -244,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
 }

 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
-    if (accelerate_draw != AccelDraw::Indexed) {
-        return 0;
-    }
    MICROPROFILE_SCOPE(OpenGL_Index);
    const auto& regs = system.GPU().Maxwell3D().regs;
    const std::size_t size = CalculateIndexBufferSize();
@@ -542,7 +543,8 @@ void RasterizerOpenGL::Clear() {
    }
 }

-void RasterizerOpenGL::DrawPrelude() {
+void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+    MICROPROFILE_SCOPE(OpenGL_Drawing);
    auto& gpu = system.GPU().Maxwell3D();

    SyncRasterizeEnable(state);
@@ -563,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() {

    buffer_cache.Acquire();

-    // Draw the vertex batch
-    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-
    std::size_t buffer_size = CalculateVertexArraysSize();

    // Add space for index buffer
@@ -592,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() {
    // Upload vertex and index data.
    SetupVertexBuffer(vao);
    SetupVertexInstances(vao);
-    index_buffer_offset = SetupIndexBuffer();
+
+    GLintptr index_buffer_offset;
+    if (is_indexed) {
+        index_buffer_offset = SetupIndexBuffer();
+    }

    // Prepare packed bindings.
    bind_ubo_pushbuffer.Setup();
@@ -626,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() {
        // As all cached buffers are invalidated, we need to recheck their state.
        gpu.dirty.ResetVertexArrays();
    }
+    gpu.dirty.memory_general = false;

    shader_program_manager->ApplyTo(state);
    state.Apply();
@@ -633,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() {
    if (texture_cache.TextureBarrier()) {
        glTextureBarrier();
    }
+
+    const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+    const GLsizei num_instances =
+        static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+    if (is_indexed) {
+        const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+        glDrawElementsInstancedBaseVertexBaseInstance(
+            primitive_mode, num_vertices, index_format,
+            reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
+            base_instance);
+    } else {
+        const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
+        const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+        glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
+                                          base_instance);
+    }
 }

-struct DrawParams {
-    bool is_indexed{};
-    bool is_instanced{};
-    GLenum primitive_mode{};
-    GLint count{};
-    GLint base_vertex{};
-
-    // Indexed settings
-    GLenum index_format{};
-    GLintptr index_buffer_offset{};
-
-    // Instanced setting
-    GLint num_instances{};
-    GLint base_instance{};
-
-    void DispatchDraw() {
-        if (is_indexed) {
-            const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
-            if (is_instanced) {
-                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
-                                                              index_buffer_ptr, num_instances,
-                                                              base_vertex, base_instance);
-            } else {
-                glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
-                                         base_vertex);
-            }
-        } else {
-            if (is_instanced) {
-                glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
-                                                  base_instance);
-            } else {
-                glDrawArrays(primitive_mode, base_vertex, count);
-            }
-        }
-    }
-};
-
 bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto current_instance = maxwell3d.state.current_instance;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(1);
-    draw_call.base_instance = static_cast<GLint>(current_instance);
-    draw_call.is_instanced = current_instance > 0;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
-    } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
-    }
-    draw_call.DispatchDraw();
-
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+    Draw(is_indexed, false);
    return true;
 }

 bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
-    accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
-
-    DrawPrelude();
-
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
-    const auto& draw_setup = maxwell3d.mme_draw;
-    DrawParams draw_call{};
-    draw_call.is_indexed = is_indexed;
-    draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
-    draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
-    draw_call.is_instanced = draw_setup.instance_count > 1;
-    draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
-    if (draw_call.is_indexed) {
-        draw_call.count = static_cast<GLint>(regs.index_array.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
-        draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
-        draw_call.index_buffer_offset = index_buffer_offset;
-    } else {
-        draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
-        draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
-    }
-    draw_call.DispatchDraw();
-
-    maxwell3d.dirty.memory_general = false;
-    accelerate_draw = AccelDraw::Disabled;
+    Draw(is_indexed, true);
    return true;
 }

@@ -942,8 +873,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
    u32 binding = device.GetBaseBindings(stage_index).sampler;
    for (const auto& entry : shader->GetShaderEntries().samplers) {
        const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
-        const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
    }
 }

@@ -952,8 +890,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
    const auto& compute = system.GPU().KeplerCompute();
    u32 binding = 0;
    for (const auto& entry : kernel->GetShaderEntries().samplers) {
-        const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture =
+                GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture =
+                    GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
    }
 }

@@ -1273,6 +1220,7 @@ void RasterizerOpenGL::SyncPointState() {
    // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
    // in OpenGL).
    state.point.program_control = regs.vp_point_size.enable != 0;
+    state.point.sprite = regs.point_sprite_enable != 0;
    state.point.size = std::max(1.0f, regs.point_size);
 }

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -103,7 +103,7 @@ private:
                           std::size_t size);

    /// Syncs all the state, shaders, render targets and textures setting before a draw call.
-    void DrawPrelude();
+    void Draw(bool is_indexed, bool is_instanced);

    /// Configures the current textures to use for the draw command.
    void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
@@ -220,12 +220,7 @@ private:

    GLintptr SetupIndexBuffer();

-    GLintptr index_buffer_offset;
-
    void SetupShaders(GLenum primitive_mode);
-
-    enum class AccelDraw { Disabled, Arrays, Indexed };
-    AccelDraw accelerate_draw = AccelDraw::Disabled;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
 }

 void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+    locker.SetBoundBuffer(usage.bound_buffer);
    for (const auto& key : usage.keys) {
        const auto [buffer, offset] = key.first;
        locker.InsertKey(buffer, offset, key.second);
@@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {

 ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
                                            const ConstBufferLocker& locker) const {
-    return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
+    return ShaderDiskCacheUsage{unique_identifier,         variant,
+                                locker.GetBoundBuffer(),   locker.GetKeys(),
                                locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
 }

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -391,6 +391,7 @@ public:
        DeclareVertex();
        DeclareGeometry();
        DeclareRegisters();
+        DeclareCustomVariables();
        DeclarePredicates();
        DeclareLocalMemory();
        DeclareInternalFlags();
@@ -503,6 +504,16 @@ private:
        }
    }

+    void DeclareCustomVariables() {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
+            code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
+        }
+        if (num_custom_variables > 0) {
+            code.AddNewLine();
+        }
+    }
+
    void DeclarePredicates() {
        const auto& predicates = ir.GetPredicates();
        for (const auto pred : predicates) {
@@ -655,7 +666,8 @@ private:
        u32 binding = device.GetBaseBindings(stage).sampler;
        for (const auto& sampler : ir.GetSamplers()) {
            const std::string name = GetSampler(sampler);
-            const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
+            const std::string description = fmt::format("layout (binding = {}) uniform", binding);
+            binding += sampler.IsIndexed() ? sampler.Size() : 1;

            std::string sampler_type = [&]() {
                if (sampler.IsBuffer()) {
@@ -682,7 +694,11 @@ private:
                sampler_type += "Shadow";
            }

-            code.AddLine("{} {} {};", description, sampler_type, name);
+            if (!sampler.IsIndexed()) {
+                code.AddLine("{} {} {};", description, sampler_type, name);
+            } else {
+                code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
+            }
        }
        if (!ir.GetSamplers().empty()) {
            code.AddNewLine();
@@ -775,6 +791,11 @@ private:
            return {GetRegister(index), Type::Float};
        }

+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {GetCustomVariable(index), Type::Float};
+        }
+
        if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
            const u32 value = immediate->GetValue();
            if (value < 10) {
@@ -1019,7 +1040,6 @@ private:
                }
                return {{"gl_ViewportIndex", Type::Int}};
            case 3:
-                UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
                return {{"gl_PointSize", Type::Float}};
            }
            return {};
@@ -1099,7 +1119,11 @@ private:
        } else if (!meta->ptp.empty()) {
            expr += "Offsets";
        }
-        expr += '(' + GetSampler(meta->sampler) + ", ";
+        if (!meta->sampler.IsIndexed()) {
+            expr += '(' + GetSampler(meta->sampler) + ", ";
+        } else {
+            expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
+        }
        expr += coord_constructors.at(count + (has_array ? 1 : 0) +
                                      (has_shadow && !separate_dc ? 1 : 0) - 1);
        expr += '(';
@@ -1311,6 +1335,8 @@ private:
            const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
            target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
                      Type::Uint};
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {GetCustomVariable(cv->GetIndex()), Type::Float};
        } else {
            UNREACHABLE_MSG("Assign called without a proper target");
        }
@@ -1858,10 +1884,7 @@ private:

    template <const std::string_view& opname, Type type>
    Expression Atomic(Operation operation) {
-        ASSERT(stage == ShaderType::Compute);
-        auto& smem = std::get<SmemNode>(*operation[0]);
-
-        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
                            Visit(operation[1]).As(type)),
                type};
    }
@@ -2241,6 +2264,10 @@ private:
        return GetDeclarationWithSuffix(index, "gpr");
    }

+    std::string GetCustomVariable(u32 index) const {
+        return GetDeclarationWithSuffix(index, "custom_var");
+    }
+
    std::string GetPredicate(Tegra::Shader::Pred pred) const {
        return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
    }
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -53,7 +53,7 @@ struct BindlessSamplerKey {
    Tegra::Engines::SamplerDescriptor sampler{};
 };

-constexpr u32 NativeVersion = 11;
+constexpr u32 NativeVersion = 12;

 // Making sure sizes doesn't change by accident
 static_assert(sizeof(ProgramVariant) == 20);
@@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
            u32 num_bound_samplers{};
            u32 num_bindless_samplers{};
            if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
-                file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+                file.ReadArray(&usage.variant, 1) != 1 ||
+                file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
                file.ReadArray(&num_bound_samplers, 1) != 1 ||
                file.ReadArray(&num_bindless_samplers, 1) != 1) {
                LOG_ERROR(Render_OpenGL, error_loading);
@@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
        u32 num_bindless_samplers{};
        ShaderDiskCacheUsage usage;
        if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
-            !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+            !LoadObjectFromPrecompiled(usage.variant) ||
+            !LoadObjectFromPrecompiled(usage.bound_buffer) ||
+            !LoadObjectFromPrecompiled(num_keys) ||
            !LoadObjectFromPrecompiled(num_bound_samplers) ||
            !LoadObjectFromPrecompiled(num_bindless_samplers)) {
            return {};
@@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {

    if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
        file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+        file.WriteObject(usage.bound_buffer) != 1 ||
        file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
        file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
        file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
@@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
    };

    if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
-        !SaveObjectToPrecompiled(usage.variant) ||
+        !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
        !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
        !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
        !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
 struct ShaderDiskCacheUsage {
    u64 unique_identifier{};
    ProgramVariant variant;
+    u32 bound_buffer{};
    VideoCommon::Shader::KeyMap keys;
    VideoCommon::Shader::BoundSamplerMap bound_samplers;
    VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() {

 void OpenGLState::ApplyPointSize() {
    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
+    Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
    if (UpdateValue(cur_state.point.size, point.size)) {
        glPointSize(point.size);
    }
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -132,6 +132,7 @@ public:

    struct {
        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        bool sprite = false;          // GL_POINT_SPRITE
        GLfloat size = 1.0f;          // GL_POINT_SIZE
    } point;

--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) {
    return GL_NONE;
 }

+GLenum GetComponent(PixelFormat format, bool is_first) {
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+        return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+    case PixelFormat::S8Z24:
+        return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+    default:
+        UNREACHABLE();
+        return GL_DEPTH_COMPONENT;
+    }
+}
+
 void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
    if (params.IsBuffer()) {
        return;
@@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
    glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
    if (params.num_levels == 1) {
        glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
    }
@@ -416,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
    if (new_swizzle == swizzle)
        return;
    swizzle = new_swizzle;
-    const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
-                                             GetSwizzleSource(z_source),
-                                             GetSwizzleSource(w_source)};
+    const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
+                                   GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
    const GLuint handle = GetTexture();
-    glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+    const PixelFormat format = surface.GetSurfaceParams().pixel_format;
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+    case PixelFormat::S8Z24:
+        glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+                            GetComponent(format, x_source == SwizzleSource::R));
+        break;
+    default:
+        glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+        break;
+    }
 }

 OGLTextureView CachedSurfaceView::CreateTextureView() const {
@@ -529,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
    const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;

-    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
-                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+    glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
+                      static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
+                      static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
+                      static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
+                      buffers,
                      is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
 }

--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -47,8 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_UNSIGNED_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::SignedInt:
@@ -72,8 +71,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
            return GL_INT_2_10_10_10_REV;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    case Maxwell::VertexAttribute::Type::Float:
@@ -89,13 +87,19 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
            return GL_FLOAT;
        default:
-            LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
-            UNREACHABLE();
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+            return {};
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+        switch (attrib.size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8:
+            return GL_UNSIGNED_BYTE;
+        default:
+            LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
            return {};
        }
    default:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
-        UNREACHABLE();
+        LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
        return {};
    }
 }
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -0,0 +1,265 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/telemetry.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+#include "core/perf_stats.h"
+#include "core/settings.h"
+#include "core/telemetry_session.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+
+namespace Vulkan {
+
+namespace {
+
+VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_,
+                       VkDebugUtilsMessageTypeFlagsEXT type,
+                       const VkDebugUtilsMessengerCallbackDataEXT* data,
+                       [[maybe_unused]] void* user_data) {
+    const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
+    const char* message{data->pMessage};
+
+    if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) {
+        LOG_CRITICAL(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) {
+        LOG_WARNING(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) {
+        LOG_INFO(Render_Vulkan, "{}", message);
+    } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) {
+        LOG_DEBUG(Render_Vulkan, "{}", message);
+    }
+    return VK_FALSE;
+}
+
+std::string GetReadableVersion(u32 version) {
+    return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
+                       VK_VERSION_PATCH(version));
+}
+
+std::string GetDriverVersion(const VKDevice& device) {
+    // Extracted from
+    // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
+    const u32 version = device.GetDriverVersion();
+
+    if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+        const u32 major = (version >> 22) & 0x3ff;
+        const u32 minor = (version >> 14) & 0x0ff;
+        const u32 secondary = (version >> 6) & 0x0ff;
+        const u32 tertiary = version & 0x003f;
+        return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
+    }
+    if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) {
+        const u32 major = version >> 14;
+        const u32 minor = version & 0x3fff;
+        return fmt::format("{}.{}", major, minor);
+    }
+
+    return GetReadableVersion(version);
+}
+
+std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_extensions) {
+    std::sort(std::begin(available_extensions), std::end(available_extensions));
+
+    static constexpr std::size_t AverageExtensionSize = 64;
+    std::string separated_extensions;
+    separated_extensions.reserve(available_extensions.size() * AverageExtensionSize);
+
+    const auto end = std::end(available_extensions);
+    for (auto extension = std::begin(available_extensions); extension != end; ++extension) {
+        if (const bool is_last = extension + 1 == end; is_last) {
+            separated_extensions += *extension;
+        } else {
+            separated_extensions += fmt::format("{},", *extension);
+        }
+    }
+    return separated_extensions;
+}
+
+} // Anonymous namespace
+
+RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
+    : RendererBase(window), system{system} {}
+
+RendererVulkan::~RendererVulkan() {
+    ShutDown();
+}
+
+void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+    const auto& layout = render_window.GetFramebufferLayout();
+    if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
+        const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
+        const bool use_accelerated =
+            rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
+        const bool is_srgb = use_accelerated && screen_info.is_srgb;
+        if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) {
+            swapchain->Create(layout.width, layout.height, is_srgb);
+            blit_screen->Recreate();
+        }
+
+        scheduler->WaitWorker();
+
+        swapchain->AcquireNextImage();
+        const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+
+        scheduler->Flush(false, render_semaphore);
+
+        if (swapchain->Present(render_semaphore, fence)) {
+            blit_screen->Recreate();
+        }
+
+        render_window.SwapBuffers();
+        rasterizer->TickFrame();
+    }
+
+    render_window.PollEvents();
+}
+
+bool RendererVulkan::Init() {
+    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
+    render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
+    const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr);
+
+    std::optional<vk::DebugUtilsMessengerEXT> callback;
+    if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
+        callback = CreateDebugCallback(dldi);
+        if (!callback) {
+            return false;
+        }
+    }
+
+    if (!PickDevices(dldi)) {
+        if (callback) {
+            instance.destroy(*callback, nullptr, dldi);
+        }
+        return false;
+    }
+    debug_callback = UniqueDebugUtilsMessengerEXT(
+        *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
+                       instance, nullptr, device->GetDispatchLoader()));
+
+    Report();
+
+    memory_manager = std::make_unique<VKMemoryManager>(*device);
+
+    resource_manager = std::make_unique<VKResourceManager>(*device);
+
+    const auto& framebuffer = render_window.GetFramebufferLayout();
+    swapchain = std::make_unique<VKSwapchain>(surface, *device);
+    swapchain->Create(framebuffer.width, framebuffer.height, false);
+
+    scheduler = std::make_unique<VKScheduler>(*device, *resource_manager);
+
+    rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
+                                                    *resource_manager, *memory_manager, *scheduler);
+
+    blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
+                                                 *resource_manager, *memory_manager, *swapchain,
+                                                 *scheduler, screen_info);
+
+    return true;
+}
+
+void RendererVulkan::ShutDown() {
+    if (!device) {
+        return;
+    }
+    const auto dev = device->GetLogical();
+    const auto& dld = device->GetDispatchLoader();
+    if (dev && dld.vkDeviceWaitIdle) {
+        dev.waitIdle(dld);
+    }
+
+    rasterizer.reset();
+    blit_screen.reset();
+    scheduler.reset();
+    swapchain.reset();
+    memory_manager.reset();
+    resource_manager.reset();
+    device.reset();
+}
+
+std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback(
+    const vk::DispatchLoaderDynamic& dldi) {
+    const vk::DebugUtilsMessengerCreateInfoEXT callback_ci(
+        {},
+        vk::DebugUtilsMessageSeverityFlagBitsEXT::eError |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
+            vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
+        vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
+            vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
+        &DebugCallback, nullptr);
+    vk::DebugUtilsMessengerEXT callback;
+    if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
+        vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
+        return {};
+    }
+    return callback;
+}
+
+bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) {
+    const auto devices = instance.enumeratePhysicalDevices(dldi);
+
+    // TODO(Rodrigo): Choose device from config file
+    const s32 device_index = Settings::values.vulkan_device;
+    if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
+        LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
+        return false;
+    }
+    const vk::PhysicalDevice physical_device = devices[device_index];
+
+    if (!VKDevice::IsSuitable(dldi, physical_device, surface)) {
+        return false;
+    }
+
+    device = std::make_unique<VKDevice>(dldi, physical_device, surface);
+    return device->Create(dldi, instance);
+}
+
+void RendererVulkan::Report() const {
+    const std::string vendor_name{device->GetVendorName()};
+    const std::string model_name{device->GetModelName()};
+    const std::string driver_version = GetDriverVersion(*device);
+    const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version);
+
+    const std::string api_version = GetReadableVersion(device->GetApiVersion());
+
+    const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions());
+
+    LOG_INFO(Render_Vulkan, "Driver: {}", driver_name);
+    LOG_INFO(Render_Vulkan, "Device: {}", model_name);
+    LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
+
+    auto& telemetry_session = system.TelemetrySession();
+    constexpr auto field = Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
+    telemetry_session.AddField(field, "GPU_Model", model_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
+    telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version);
+    telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -400,8 +400,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
             VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
        Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
             false);
-        Test(extension, nv_device_diagnostic_checkpoints,
-             VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        if (Settings::values.renderer_debug) {
+            Test(extension, nv_device_diagnostic_checkpoints,
+                 VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME, true);
+        }
    }

    if (khr_shader_float16_int8) {
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -571,7 +571,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
        }
        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
-            texceptions.set(rt);
+            texceptions[rt] = true;
        }
    }

@@ -579,7 +579,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
    }
    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
-        texceptions.set(ZETA_TEXCEPTION_INDEX);
+        texceptions[ZETA_TEXCEPTION_INDEX] = true;
    }

    texture_cache.GuardRenderTargets(false);
@@ -1122,11 +1122,12 @@ RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions)

    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
        const auto& rendertarget = regs.rt[rt];
-        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
+        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
            continue;
+        }
        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
-            texceptions.test(rt)});
+            texceptions[rt]});
    }

    renderpass_params.has_zeta = regs.zeta_enable;
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -353,6 +353,7 @@ private:
        DeclareFragment();
        DeclareCompute();
        DeclareRegisters();
+        DeclareCustomVariables();
        DeclarePredicates();
        DeclareLocalMemory();
        DeclareSharedMemory();
@@ -586,6 +587,15 @@ private:
        }
    }

+    void DeclareCustomVariables() {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("custom_var_{}", i));
+            custom_variables.emplace(i, AddGlobalVariable(id));
+        }
+    }
+
    void DeclarePredicates() {
        for (const auto pred : ir.GetPredicates()) {
            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
@@ -982,6 +992,11 @@ private:
            return {OpLoad(t_float, registers.at(index)), Type::Float};
        }

+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
+        }
+
        if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
            return {Constant(t_uint, immediate->GetValue()), Type::Uint};
        }
@@ -1123,15 +1138,7 @@ private:
        }

        if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-
-            Id offset = OpISub(t_uint, real, base);
-            offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
-            return {OpLoad(t_float,
-                           OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
-                    Type::Float};
+            return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
        }

        if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@@ -1142,10 +1149,7 @@ private:
        }

        if (const auto smem = std::get_if<SmemNode>(&*node)) {
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
-            return {OpLoad(t_uint, pointer), Type::Uint};
+            return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
        }

        if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1339,20 +1343,13 @@ private:
            target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};

        } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-            ASSERT(stage == ShaderType::Compute);
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
+            target = {GetSharedMemoryPointer(*smem), Type::Uint};

        } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-            const Id diff = OpISub(t_uint, real, base);
-            const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
+            target = {GetGlobalMemoryPointer(*gmem), Type::Uint};

-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
-                      Type::Float};
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {custom_variables.at(cv->GetIndex()), Type::Float};

        } else {
            UNIMPLEMENTED();
@@ -1804,11 +1801,16 @@ private:
        return {};
    }

-    Expression UAtomicAdd(Operation operation) {
-        const auto& smem = std::get<SmemNode>(*operation[0]);
-        Id address = AsUint(Visit(smem.GetAddress()));
-        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-        const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
+    Expression AtomicAdd(Operation operation) {
+        Id pointer;
+        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+            pointer = GetSharedMemoryPointer(*smem);
+        } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+            pointer = GetGlobalMemoryPointer(*gmem);
+        } else {
+            UNREACHABLE();
+            return {Constant(t_uint, 0), Type::Uint};
+        }

        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
        const Id semantics = Constant(t_uint, 0U);
@@ -2243,6 +2245,22 @@ private:
        return {};
    }

+    Id GetGlobalMemoryPointer(const GmemNode& gmem) {
+        const Id real = AsUint(Visit(gmem.GetRealAddress()));
+        const Id base = AsUint(Visit(gmem.GetBaseAddress()));
+        const Id diff = OpISub(t_uint, real, base);
+        const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
+        const Id buffer = global_buffers.at(gmem.GetDescriptor());
+        return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
+    }
+
+    Id GetSharedMemoryPointer(const SmemNode& smem) {
+        ASSERT(stage == ShaderType::Compute);
+        Id address = AsUint(Visit(smem.GetAddress()));
+        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
+        return OpAccessChain(t_smem_uint, shared_memory, address);
+    }
+
    static constexpr std::array operation_decompilers = {
        &SPIRVDecompiler::Assign,

@@ -2389,7 +2407,7 @@ private:
        &SPIRVDecompiler::AtomicImageXor,
        &SPIRVDecompiler::AtomicImageExchange,

-        &SPIRVDecompiler::UAtomicAdd,
+        &SPIRVDecompiler::AtomicAdd,

        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::BranchIndirect,
@@ -2485,9 +2503,9 @@ private:

    Id t_smem_uint{};

-    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
    const Id t_gmem_array =
-        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
+        Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
    const Id t_gmem_struct = MemberDecorate(
        Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
    const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
@@ -2508,6 +2526,7 @@ private:
    Id out_vertex{};
    Id in_vertex{};
    std::map<u32, Id> registers;
+    std::map<u32, Id> custom_variables;
    std::map<Tegra::Shader::Pred, Id> predicates;
    std::map<u32, Id> flow_variables;
    Id local_memory{};
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -65,8 +65,8 @@ public:
    void DetachSegment(ASTNode start, ASTNode end);
    void Remove(ASTNode node);

-    ASTNode first{};
-    ASTNode last{};
+    ASTNode first;
+    ASTNode last;
 };

 class ASTProgram {
@@ -299,9 +299,9 @@ private:
    friend class ASTZipper;

    ASTData data;
-    ASTNode parent{};
-    ASTNode next{};
-    ASTNode previous{};
+    ASTNode parent;
+    ASTNode next;
+    ASTNode previous;
    ASTZipper* manager{};
 };

--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
    return value;
 }

+std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
+    if (bound_buffer_saved) {
+        return bound_buffer;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    bound_buffer_saved = true;
+    bound_buffer = engine->GetBoundBuffer();
+    return bound_buffer;
+}
+
 void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
    keys.insert_or_assign({buffer, offset}, value);
 }
@@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
    bindless_samplers.insert_or_assign({buffer, offset}, sampler);
 }

+void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
+    bound_buffer_saved = true;
+    bound_buffer = buffer;
+}
+
 bool ConstBufferLocker::IsConsistent() const {
    if (!engine) {
        return false;
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
 #include "common/hash.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"

 namespace VideoCommon::Shader {

@@ -40,6 +41,8 @@ public:

    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);

+    std::optional<u32> ObtainBoundBuffer();
+
    /// Inserts a key.
    void InsertKey(u32 buffer, u32 offset, u32 value);

@@ -49,6 +52,9 @@ public:
    /// Inserts a bindless sampler key.
    void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);

+    /// Set the bound buffer for this locker.
+    void SetBoundBuffer(u32 buffer);
+
    /// Checks keys and samplers against engine's current const buffers. Returns true if they are
    /// the same value, false otherwise;
    bool IsConsistent() const;
@@ -71,12 +77,27 @@ public:
        return bindless_samplers;
    }

+    /// Gets bound buffer used on this shader
+    u32 GetBoundBuffer() const {
+        return bound_buffer;
+    }
+
+    /// Obtains access to the guest driver's profile.
+    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
+        if (engine) {
+            return &engine->AccessGuestDriverProfile();
+        }
+        return nullptr;
+    }
+
 private:
    const Tegra::Engines::ShaderType stage;
    Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
    KeyMap keys;
    BoundSamplerMap bound_samplers;
    BindlessSamplerMap bindless_samplers;
+    bool bound_buffer_saved{};
+    u32 bound_buffer{};
 };

 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <cstring>
+#include <limits>
 #include <set>

 #include <fmt/format.h>
@@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
    return (absolute_offset % SchedPeriod) == 0;
 }

+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+                              const std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
+        return;
+    }
+    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+        return;
+    }
+    u32 count{};
+    std::vector<u32> bound_offsets;
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        ++count;
+        bound_offsets.emplace_back(sampler.GetOffset());
+    }
+    if (count > 1) {
+        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+    }
+}
+
+std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
+                                        VideoCore::GuestDriverProfile* gpu_driver,
+                                        const std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        return std::nullopt;
+    }
+    const u32 base_offset = sampler_to_deduce.GetOffset();
+    u32 max_offset{std::numeric_limits<u32>::max()};
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        if (sampler.GetOffset() > base_offset) {
+            max_offset = std::min(sampler.GetOffset(), max_offset);
+        }
+    }
+    if (max_offset == std::numeric_limits<u32>::max()) {
+        return std::nullopt;
+    }
+    return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+}
+
 } // Anonymous namespace

 class ASTDecoder {
@@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
    return pc + 1;
 }

+void ShaderIR::PostDecode() {
+    // Deduce texture handler size if needed
+    auto gpu_driver = locker.AccessGuestDriverProfile();
+    DeduceTextureHandlerSize(gpu_driver, used_samplers);
+    // Deduce Indexed Samplers
+    if (!uses_indexed_samplers) {
+        return;
+    }
+    for (auto& sampler : used_samplers) {
+        if (!sampler.IsIndexed()) {
+            continue;
+        }
+        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
+            sampler.SetSize(*size);
+        } else {
+            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
+            sampler.SetSize(1);
+        }
+    }
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {

    Node op_a = GetRegister(instr.gpr8);

-    Node op_b = [&]() -> Node {
+    Node op_b = [&] {
        if (instr.is_b_imm) {
            return GetImmediate19(instr);
        } else if (instr.is_b_gpr) {
@@ -141,6 +141,15 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, value);
        break;
    }
+    case OpCode::Id::FCMP_R: {
+        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
+        Node op_c = GetRegister(instr.gpr39);
+        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
+        SetRegister(
+            bb, instr.gpr0,
+            Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
+        break;
+    }
    case OpCode::Id::RRO_C:
    case OpCode::Id::RRO_R:
    case OpCode::Id::RRO_IMM: {
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -297,7 +297,7 @@ void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Nod
    const Node one = Immediate(1);
    const Node two = Immediate(2);

-    Node value{};
+    Node value;
    for (u32 i = 0; i < lop_iterations; ++i) {
        const Node shift_amount = Immediate(i);

--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -17,10 +17,13 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

-    const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
+    const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFI_RC:
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
        case OpCode::Id::BFI_IMM_R:
-            return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
+            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
        default:
            UNREACHABLE();
            return {Immediate(0), Immediate(0)};
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,9 +19,12 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
+using Tegra::Shader::GlobalAtomicOp;
+using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;

 namespace {

@@ -61,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    }
 }

+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
+                     Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
+                     std::move(offset), Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+}
+
 } // Anonymous namespace

 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -136,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
        [[fallthrough]];
    case OpCode::Id::LD_S: {
-        const auto GetMemory = [&](s32 offset) {
+        const auto GetAddress = [&](s32 offset) {
            ASSERT(offset % 4 == 0);
            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
-                                           immediate_offset);
-            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
-                                                             : GetLocalMemory(address);
+            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+        };
+        const auto GetMemory = [&](s32 offset) {
+            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+                                                             : GetLocalMemory(GetAddress(offset));
        };

        switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits32:
-        case Tegra::Shader::StoreType::Bits64:
-        case Tegra::Shader::StoreType::Bits128: {
-            const u32 count = [&]() {
+        case StoreType::Signed16:
+            SetRegister(bb, instr.gpr0,
+                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+            break;
+        case StoreType::Bits32:
+        case StoreType::Bits64:
+        case StoreType::Bits128: {
+            const u32 count = [&] {
                switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bits32:
+                case StoreType::Bits32:
                    return 1;
-                case Tegra::Shader::StoreType::Bits64:
+                case StoreType::Bits64:
                    return 2;
-                case Tegra::Shader::StoreType::Bits128:
+                case StoreType::Bits128:
                    return 4;
                default:
                    UNREACHABLE();
@@ -212,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            // To handle unaligned loads get the bytes used to dereference global memory and extract
            // those bytes from the loaded u32.
            if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
-                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
-                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
-                                 std::move(offset), Immediate(size));
+                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
            }

            SetTemporary(bb, i, gmem);
@@ -269,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
        };

-        const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
-                                    ? &ShaderIR::SetLocalMemory
-                                    : &ShaderIR::SetSharedMemory;
+        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;

        switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits128:
+        case StoreType::Bits128:
            (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
            (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
            [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits64:
+        case StoreType::Bits64:
            (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
            [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits32:
+        case StoreType::Bits32:
            (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
            break;
+        case StoreType::Signed16: {
+            Node address = GetAddress(0);
+            Node memory = (this->*get_memory)(address);
+            (this->*set_memory)(
+                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+            break;
+        }
        default:
            UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
                              static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -323,18 +354,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            Node value = GetRegister(instr.gpr0.Value() + i);

            if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
-                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
-                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
-
-                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
-                                  Immediate(size));
+                const u32 mask = GetUnalignedMask(type);
+                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
            }

            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
    }
+    case OpCode::Id::ATOM: {
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
+                             static_cast<int>(instr.atom.type.Value()));
+
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
    case OpCode::Id::ATOMS: {
        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
                             static_cast<int>(instr.atoms.operation.Value()));
@@ -348,7 +393,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        Node memory = GetSharedMemory(std::move(address));
        Node data = GetRegister(instr.gpr20);

-        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lioncash	f00a54f508	bcat/backend: Make formatting of passphrase consistent in NullBackend::SetPassphrase() Aligns the '=' to be consistent with the rest of the logs within this source file.	2020-02-12 01:18:29 -05:00
Lioncash	eefd97e80d	bcat/backend: Prevent fmt exception in debug log within NullBackend::Clear() A formatting specifier within Clear wasn't being used, which will cause fmt to throw an exception. This fixes that.	2020-02-12 01:14:47 -05:00
bunnei	37f1cf8cbd	Merge pull request #3376 from ReinUsesLisp/point-sprite gl_rasterizer: Implement GL_POINT_SPRITE	2020-02-11 08:26:07 -05:00
bunnei	84ea9c2b42	Merge pull request #3372 from ReinUsesLisp/fix-back-stencil maxwell_3d: Fix stencil back mask	2020-02-09 22:29:28 -05:00
Zach Hilman	21c3f48279	Merge pull request #3391 from Morph1984/remove-unknown Remove option "Show files with type 'Unknown'"	2020-02-09 12:08:01 -05:00
Morph	fcf3425b1b	Remove option "Show files with type 'Unknown'"	2020-02-09 11:30:02 -05:00
bunnei	a952fbc5b3	Merge pull request #3388 from bunnei/service-shared-ptr hle: services: Use std::shared_ptr instead of copy by value. - This is a prerequisite to adding a mutex to `ServiceFramework`, which cannot be copied. - This will be used for threaded services.	2020-02-08 21:35:30 -05:00
bunnei	e210835dd0	Merge pull request #3387 from bunnei/gpu-mpscqueue gpu_thread: Use MPSCQueue for GPU commands.	2020-02-08 21:15:48 -05:00
bunnei	6536cc9741	Merge pull request #3386 from bunnei/gpu-mem-interface video_core: memory_manager: Use GPU interface for cache functions.	2020-02-08 21:15:27 -05:00
bunnei	7b07e521ca	hle: services: Use std::shared_ptr instead of copy by value.	2020-02-07 23:02:26 -05:00
bunnei	b5c13ee0eb	gpu_thread: Use MPSCQueue for GPU commands. - Necessary for multiple service threads.	2020-02-07 23:01:23 -05:00
bunnei	7cacb08cdf	video_core: memory_manager: Use GPU interface for cache functions.	2020-02-07 22:59:35 -05:00
bunnei	90bda66028	Merge pull request #3378 from ReinUsesLisp/uscaled maxwell_to_gl: Implement R8G8_USCALED	2020-02-07 22:55:52 -05:00
bunnei	90df4b8e2b	Merge pull request #3369 from ReinUsesLisp/shf shader/shift: Implement SHF	2020-02-07 22:06:57 -05:00
bunnei	aa3f9b9606	Merge pull request #3381 from bunnei/ipc-fix hle: services: Fix prepo IPC, and add better error checking.	2020-02-07 16:25:42 -05:00
bunnei	09d766d357	Merge pull request #3362 from ReinUsesLisp/fix-instanced gl_rasterizer: Fix instanced draw arrays	2020-02-06 21:39:59 -05:00
bunnei	1b01c3036d	Merge pull request #3366 from bunnei/swkbd-fixes applets: Fixes for software keyboard and transfer memory.	2020-02-05 23:26:32 -05:00
bunnei	ba53543da6	kernel: transfer_memory: Properly reserve and reset memory region.	2020-02-05 23:06:54 -05:00
Zach Hilman	7a547b9342	wait_object: Make wait behavior only require one object to signal. - This was holdover from citra.	2020-02-05 23:06:53 -05:00
bunnei	3a0c1e79f8	am: Correct IPC object count mismatch.	2020-02-05 23:06:53 -05:00
bunnei	77da74e17a	services: am: Clear events on PopOutData and PopInteractiveOutData.	2020-02-05 23:06:52 -05:00
bunnei	84e895cdd6	am: Refactor IStorage interface.	2020-02-05 23:06:52 -05:00
bunnei	3557fa25d0	applets: software_keyboard: Signal state change on end of interactive session.	2020-02-05 23:06:51 -05:00
bunnei	be5fcffb89	applets: software_keyboard: Minor cleanup.	2020-02-05 23:06:50 -05:00
bunnei	2245c24e21	services: prepo: Fix IPC interface with SaveReport/SaveReportWithUser.	2020-02-05 22:52:35 -05:00
bunnei	9751ccc5e0	hle_ipc: Add error checking to read/write buffer access.	2020-02-05 22:52:35 -05:00
ReinUsesLisp	8bb9eef97b	maxwell_to_gl: Implement R8G8_USCALED	2020-02-04 21:32:36 -03:00
ReinUsesLisp	c81c361e82	maxwell_to_gl: Reduce unimplemented formats to LOG_ERROR	2020-02-04 21:32:08 -03:00
bunnei	a0b4be4262	Merge pull request #3377 from ReinUsesLisp/bitset-vk vk_rasterizer: Use noexcept methods of std::bitset	2020-02-04 16:56:57 -05:00
ReinUsesLisp	0eb36c90f4	vk_rasterizer: Use noexcept variants of std::bitset Removes bounds checking from "texceptions" instances.	2020-02-04 18:04:24 -03:00
bunnei	08c508b1c4	Merge pull request #3357 from ReinUsesLisp/bfi-rc shader/bfi: Implement register-constant buffer variant	2020-02-04 15:14:13 -05:00
ReinUsesLisp	7da52673d0	gl_rasterizer: Implement GL_POINT_SPRITE OpenGL core defaults to GL_POINT_SPRITE, meanwhile on OpenGL compatibility we have to explicitly enable it. This fixes gl_PointCoord's behaviour.	2020-02-04 15:19:45 -03:00
bunnei	bf21aacc74	Merge pull request #3356 from ReinUsesLisp/fcmp shader/arithmetic: Implement FCMP	2020-02-04 11:36:59 -05:00
bunnei	5733287822	Merge pull request #3360 from CJBok/statusbar-buttons GUI: Togglable graphics settings buttons in status bar	2020-02-03 16:57:18 -05:00
bunnei	c31ec00d67	Merge pull request #3337 from ReinUsesLisp/vulkan-staged yuzu: Implement Vulkan frontend	2020-02-03 16:56:25 -05:00
bunnei	2cd51fc9fd	Merge pull request #3374 from lioncash/udp input_common/udp: Minor changes	2020-02-03 11:41:04 -05:00
Lioncash	c7678c3044	input_common/udp: Ensure that UDP is shut down within Shutdown() Previously the UDP backend would never actually get shut down.	2020-02-03 09:29:15 -05:00
Lioncash	83f8090273	input_common/udp: Add missing override specifiers Prevents trivial warnings and ensures interfaces are properly maintained between the base class.	2020-02-03 09:26:53 -05:00
Lioncash	5c61e0ba39	input_common/udp: std::move SocketCallback instances where applicable std::function is allowed to heap allocate if the size of the captures associated with each lambda exceed a certain threshold. This prevents potentially unnecessary reallocations from occurring.	2020-02-03 09:24:05 -05:00
Lioncash	fb9c9ddcc9	input_common/udp: std::move shared_ptr within Client constructor Gets rid of a trivially avoidable atomic reference count increment and decrement.	2020-02-03 09:21:46 -05:00
Lioncash	9bb6ab77f4	udp/client: Replace deprecated from_string() call with make_address_v4() Future-proofs code if boost is ever updated.	2020-02-03 09:20:40 -05:00
Lioncash	881408445a	input_common/udp: Silence -Wreorder warning for Socket Amends the constructor initializer list to specify the order of its elements in the same order that initialization would occur.	2020-02-03 09:15:32 -05:00
Lioncash	36524465a6	input_common/udp: Remove unnecessary inclusions	2020-02-03 09:13:40 -05:00
Lioncash	4aa9c9632d	input_common/udp: Add missing header guard	2020-02-03 09:09:06 -05:00
bunnei	157eb375a5	Merge pull request #3370 from ReinUsesLisp/node-shared-ptr shader: Remove curly braces initializers on shared pointers	2020-02-03 00:25:56 -05:00
ReinUsesLisp	4eed744277	maxwell_3d: Fix stencil back mask	2020-02-02 17:50:46 -03:00
ReinUsesLisp	223a89a19f	shader: Remove curly braces initializers on shared pointers	2020-02-01 22:52:10 -03:00
bunnei	b5bbe7e752	Merge pull request #3282 from FernandoS27/indexed-samplers Partially implement Indexed samplers in general and specific code in GLSL	2020-02-01 20:41:40 -05:00
ReinUsesLisp	729ca120e3	shader/shift: Implement SHIFT_RIGHT_{IMM,R} Shifts a pair of registers to the right and returns the low register.	2020-02-01 21:20:02 -03:00
ReinUsesLisp	017474c3f8	shader/shift: Implement SHF_LEFT_{IMM,R} Shifts a pair of registers to the left and returns the high register.	2020-02-01 21:19:44 -03:00
bunnei	2916c1bc25	Merge pull request #3268 from CJBok/deadzone GUI: Deadzone controls for sdl engine at configuration input	2020-02-01 16:35:15 -05:00
bunnei	69a6796de1	Merge pull request #3284 from CJBok/hid-fix hid: Fix analog sticks directional states	2020-02-01 14:02:41 -05:00
bunnei	c18f9898d9	Merge pull request #3364 from lioncash/thread core/arm: Remove usage of global GetCurrentThread()	2020-01-31 11:13:24 -05:00
bunnei	6b5b01b29f	Merge pull request #3363 from lioncash/unique_ptr kernel/physical_core: Make use of std::unique_ptr instead of std::shared_ptr	2020-01-30 23:33:02 -05:00
bunnei	1948fc0858	Merge pull request #3365 from yuzu-emu/revert-3151-fix-korean Revert "system_archive: Fix Korean and Chinese fonts"	2020-01-30 22:03:47 -05:00
bunnei	91b0a3f799	Revert "system_archive: Fix Korean and Chinese fonts"	2020-01-30 22:02:15 -05:00
Lioncash	472319e573	core/arm: Remove usage of global GetCurrentThread() Now both CPU backends go through their referenced system instance to obtain the current thread.	2020-01-30 18:52:25 -05:00
Lioncash	2de2bb980e	kernel/physical_core: Make use of std::unique_ptr shared_ptr was used in `2d1984c20c` due to a misunderstanding of how the language generates move constructors and move assignment operators. If a destructor is user-provided, then the compiler won't generate the move constructor and move assignment operators by default--they must be explicitly opted into. The reason for the compilation errors is due to the fact that the language will fall back to attempting to use the copy constructor/copy assignment operators if the respective move constructor or move assignment operator is unavailable. Given that we explicitly opt into them now, the the move constructor and move assignment operators will be generated as expected.	2020-01-30 18:42:40 -05:00
Lioncash	16e7b7b83d	core/cpu_manager: Remove unused includes Nothing from these headers are used within this source file, so we can remove them.	2020-01-30 18:30:57 -05:00
Lioncash	51927bc9dc	kernel/physical_core: Remove unused kernel reference member variable This isn't used within the class, so it can be removed to simplify the overall interface. While we're in the same area, we can simplify a unique_ptr reset() call.	2020-01-30 18:29:57 -05:00
bunnei	985d0f35e5	Merge pull request #3353 from FernandoS27/aries System: Refactor CPU Core management and move ARMInterface and Schedulers to Kernel	2020-01-30 18:13:59 -05:00
bunnei	8a7cdfc3ff	Merge pull request #3151 from FearlessTobi/fix-korean system_archive: Fix Korean and Chinese fonts	2020-01-30 15:09:55 -05:00
bunnei	c593e45dbd	Merge pull request #3347 from ReinUsesLisp/local-mem shader/memory: Implement LDL.S16, LDS.S16, STL.S16 and STS.S16	2020-01-30 10:59:52 -05:00
ReinUsesLisp	b69321650e	gl_rasterizer: Fix instanced draw arrays glDrawArrays was being used when the draw had a base instance specified. This commit removes the draw parameters abstraction and fixes the mentioned issue.	2020-01-30 02:22:00 -03:00
ReinUsesLisp	d027850f33	ci: Disable Vulkan for Windows MinGW builds	2020-01-29 19:44:00 -03:00
ReinUsesLisp	a7beabb68f	yuzu/bootmanager: Define Vulkan widget only when enabled	2020-01-29 19:20:12 -03:00
bunnei	2db7adc42a	Merge pull request #3350 from ReinUsesLisp/atom shader/memory: Implement ATOM.ADD	2020-01-29 16:49:54 -05:00
ReinUsesLisp	252415a163	ci: Disable Vulkan for Linux builds	2020-01-29 18:06:16 -03:00
ReinUsesLisp	c29584a090	yuzu_cmd: Fix memcpy on Vulkan handlers	2020-01-29 17:53:11 -03:00
ReinUsesLisp	f92cbc5501	yuzu: Implement Vulkan frontend Adds a Qt and SDL2 frontend for Vulkan. It also finishes the missing bits on Vulkan initialization.	2020-01-29 17:53:11 -03:00
ReinUsesLisp	8299f1ceef	web_service/telemetry_json: Report USER_CONFIG	2020-01-29 17:53:11 -03:00
ReinUsesLisp	788d57d723	settings: Add settings for graphics backend	2020-01-29 17:53:11 -03:00
ReinUsesLisp	e651e54b85	core: Only wait for idle on gpu_core when it was initialized This fixes crashes when a Vulkan device fails to initialize.	2020-01-29 17:53:11 -03:00
ReinUsesLisp	9f0162e4b5	shader/other: Fix skips for SYNC and BRK	2020-01-29 17:53:11 -03:00
ReinUsesLisp	270177f38a	shader/other: Stub S2R LaneId	2020-01-29 17:53:11 -03:00
ReinUsesLisp	b35449c85d	buffer_cache: Delay buffer destructions Delay buffer destruction some extra frames to avoid destroying buffers that are still being used from older frames. This happens on Nvidia's driver with mailbox.	2020-01-29 17:53:11 -03:00
bunnei	b11aeced18	Merge pull request #3355 from ReinUsesLisp/break-down texture_cache/surface_base: Fix layered break down	2020-01-29 12:29:56 -05:00
bunnei	91f79225e7	Merge pull request #3358 from ReinUsesLisp/implicit-texture-cache gl_texture_cache: Silence implicit sign cast warnings	2020-01-29 11:23:50 -05:00
CJBok	8d6b4e836c	clang	2020-01-29 05:43:55 +01:00
CJBok	6e87111f91	minor corrections	2020-01-29 00:02:28 +01:00
CJBok	4bc4fdf5ff	GUI: Togglable graphics settings buttons in status bar	2020-01-28 23:59:30 +01:00
bunnei	c457e47297	Merge pull request #3359 from ReinUsesLisp/assert-point-size gl_shader_decompiler: Remove UNIMPLEMENTED for gl_PointSize	2020-01-28 15:19:51 -05:00
ReinUsesLisp	8178fe8960	gl_shader_decompiler: Remove UNIMPLEMENTED for gl_PointSize This was implemented by a previous commit and it's no longer required.	2020-01-28 16:32:30 -03:00
bunnei	283f3253bc	Merge pull request #3352 from Simek/dark-theme-refinements GUI: dark themes refinements and QSS cleanup	2020-01-28 14:05:36 -05:00
bunnei	bea6327d74	Merge pull request #3354 from ReinUsesLisp/depth-stencil gl_texture_cache: Properly implement depth/stencil sampling	2020-01-28 12:06:11 -05:00
ReinUsesLisp	abae795986	gl_texture_cache: Silence implicit sign cast warnings	2020-01-27 20:59:11 -03:00
bunnei	acfb0b4852	Merge pull request #3346 from bunnei/bsd-stub bsd: Stub several more functions.	2020-01-27 13:06:05 -05:00
Fernando Sahmkow	2d1984c20c	System: Address Feedback	2020-01-27 09:54:11 -04:00
ReinUsesLisp	137a8aa55c	shader/bfi: Implement register-constant buffer variant It's the same as the variant that was implemented, but it takes the operands from another source.	2020-01-27 01:20:38 -03:00
ReinUsesLisp	e3fc3459c8	shader/arithmetic: Implement FCMP Compares the third operand with zero, then selects between the first and second.	2020-01-27 01:15:44 -03:00
ReinUsesLisp	f55f6ff9bb	texture_cache/surface_base: Fix layered break down Layered break downs was passing "layer" as a "depth" parameter. This commit addresses that.	2020-01-26 21:48:07 -03:00
ReinUsesLisp	d17dfa6104	gl_texture_cache: Properly implement depth/stencil sampling This addresses the long standing issue of compatibility vs. core profiles on OpenGL, properly implementing depth vs. stencil sampling depending on the texture swizzle.	2020-01-26 21:44:08 -03:00
Fernando Sahmkow	de4b01f75d	System: Correct PrepareReschedule.	2020-01-26 14:32:50 -04:00
Fernando Sahmkow	a1630ab53e	Kernel: Remove a few global instances from the kernel.	2020-01-26 14:23:46 -04:00
Fernando Sahmkow	e4a1ead897	Core: Refactor CpuCoreManager to CpuManager and Cpu to Core Manager. This commit instends on better naming the new purpose of this classes.	2020-01-26 14:07:22 -04:00
Fernando Sahmkow	450341b397	ArmInterface: Delegate Exclusive monitor factory to exclusive monitor interfasce.	2020-01-26 10:28:23 -04:00
ReinUsesLisp	d95d4ac843	shader/memory: Implement ATOM.ADD ATOM operates atomically on global memory. For now only add ATOM.ADD since that's what was found in commercial games. This asserts for ATOM.ADD.S32 (handling the others as unimplemented), although ATOM.ADD.U32 shouldn't be any different. This change forces us to change the default type on SPIR-V storage buffers from float to uint. We could also alias the buffers, but it's simpler for now to just use uint. While we are at it, abstract the code to avoid repetition.	2020-01-26 01:54:24 -03:00
Fernando Sahmkow	4d6a86b03f	Core: Refactor CPU Management. This commit moves ARM Interface and Scheduler handling into the kernel.	2020-01-25 18:55:32 -04:00
Fernando Sahmkow	bb8eb15d39	Shader_IR: Address feedback.	2020-01-25 09:04:59 -04:00
ReinUsesLisp	d26e74f0a3	shader/memory: Implement STL.S16 and STS.S16	2020-01-25 03:16:10 -03:00
ReinUsesLisp	9a2cdf8520	shader/memory: Implement unaligned LDL.S16 and LDS.S16	2020-01-25 03:16:10 -03:00
ReinUsesLisp	531f25a037	shader/memory: Move unaligned load/store to functions	2020-01-25 03:16:10 -03:00
ReinUsesLisp	96638f57c9	shader/memory: Implement LDL.S16 and LDS.S16	2020-01-25 03:15:55 -03:00
bunnei	2a822f3378	bsd: Stub several more functions. - Required for Little Town Hero to boot further.	2020-01-25 00:47:15 -05:00
FearlessTobi	845a5dbca9	Disable clang-format for font files	2020-01-24 23:54:19 +01:00
Fernando Sahmkow	806f569143	Shader_IR: Change name of TrackSampler function so it does not confuse with the type.	2020-01-24 16:44:48 -04:00
Fernando Sahmkow	3919b7b8a9	Shader_IR: Corrections, styling and extras.	2020-01-24 16:44:48 -04:00
Fernando Sahmkow	37b8504faa	Shader_IR: Correct Custom Variable assignment.	2020-01-24 16:44:47 -04:00
Fernando Sahmkow	7c530e0666	Shader_IR: Propagate bindless index into the GL compiler.	2020-01-24 16:44:47 -04:00
Fernando Sahmkow	3c34678627	Shader_IR: Implement Injectable Custom Variables to the IR.	2020-01-24 16:43:31 -04:00
Fernando Sahmkow	2b02f29a2d	GL Backend: Introduce indexed samplers into the GL backend	2020-01-24 16:43:31 -04:00
Fernando Sahmkow	037ea431ce	Shader_IR: deduce size of indexed samplers	2020-01-24 16:43:31 -04:00
Fernando Sahmkow	f4603d23c5	Shader_IR: Setup Indexed Samplers on the IR	2020-01-24 16:43:30 -04:00
Fernando Sahmkow	603c861532	Shader_IR: Implement initial code for tracking indexed samplers.	2020-01-24 16:43:30 -04:00
Fernando Sahmkow	64496f2456	Shader_IR: Address Feedback	2020-01-24 16:43:30 -04:00
Fernando Sahmkow	b97608ca64	Shader_IR: Allow constant access of guest driver.	2020-01-24 16:43:30 -04:00
Fernando Sahmkow	dc5cfa8d28	Shader_IR: Address Feedback	2020-01-24 16:43:29 -04:00
Fernando Sahmkow	74aa7de5e3	Guest_driver: Correct compiling errors in GCC.	2020-01-24 16:43:29 -04:00
Fernando Sahmkow	1e4b6bef6f	Shader_IR: Store Bound buffer on Shader Usage	2020-01-24 16:43:29 -04:00
Fernando Sahmkow	c921e496eb	GPU: Implement guest driver profile and deduce texture handler sizes.	2020-01-24 16:43:29 -04:00
Fernando Sahmkow	ab89ced244	Kernel: Implement Physical Core.	2020-01-24 15:38:20 -04:00
FearlessTobi	4e9331f45d	system_archive: Fix Chinese font Adds the proper OSS font for the Chinese language.	2020-01-19 15:09:53 +01:00
FearlessTobi	999e3f89b9	system_archive: Fix Korean font Fixes Korean fonts when using Open-source system archives.	2020-01-19 15:09:50 +01:00
CJBok	635deb70d4	Moved analog direction logic to sdl_impl	2020-01-15 11:25:15 +01:00
CJBok	231d9c10f3	Corrected directional states sensitivity	2020-01-14 21:51:58 +01:00
CJBok	83be9fc96d	Merge remote-tracking branch 'upstream/master'	2020-01-12 23:21:30 +01:00
CJBok	ae7fd01e38	hid: Fix analog sticks directional states	2020-01-09 02:40:55 +01:00
CJBok	2fa9a96309	const correction	2020-01-03 10:30:51 +01:00
CJBok	90f9c830ca	clang	2020-01-03 09:31:54 +01:00
CJBok	351e3fb72e	Update configure_input_player.cpp	2020-01-03 09:11:34 +01:00
CJBok	4a566b9828	Added deadzone controls for sdl engine at input settings	2020-01-03 08:54:57 +01:00