"Merge Tagged PR 1340"

Merge pull request #5278 from MerryMage/cpuopt_unsafe_inaccurate_nan
dynarmic: Add Unsafe_InaccurateNaN optimization
2021-01-03 13:01:57 +00:00 · 2021-01-03 03:27:29 -08:00 · 2021-01-03 01:01:38 -08:00 · 2021-01-03 12:25:21 +08:00 · 2021-01-03 01:58:14 +01:00 · 2021-01-02 15:44:32 -08:00
280 changed files with 12401 additions and 9931 deletions
--- a/.ci/templates/build-msvc.yml
+++ b/.ci/templates/build-msvc.yml
@@ -8,7 +8,7 @@ steps:
  displayName: 'Install vulkan-sdk'
 - script: python -m pip install --upgrade pip conan
  displayName: 'Install conan'
- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
+- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
  displayName: 'Configure CMake'
 - task: MSBuild@1
  displayName: 'Build'
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -165,7 +165,7 @@ macro(yuzu_find_packages)
        "lz4               1.8         lz4/1.9.2"
        "nlohmann_json     3.8         nlohmann_json/3.8.0"
        "ZLIB              1.2         zlib/1.2.11"
-        "zstd              1.4         zstd/1.4.5"
+        "zstd              1.4         zstd/1.4.8"
    )

    foreach(PACKAGE ${REQUIRED_LIBS})
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -62,6 +62,7 @@ else()
        -Werror=implicit-fallthrough
        -Werror=missing-declarations
        -Werror=reorder
+        -Werror=uninitialized
        -Werror=unused-result
        -Wextra
        -Wmissing-declarations
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -218,7 +218,7 @@ void Resample(s32* output, const s32* input, s32 pitch, s32& fraction, std::size
        const auto l2 = lut[lut_index + 2];
        const auto l3 = lut[lut_index + 3];

-        const auto s0 = static_cast<s32>(input[index]);
+        const auto s0 = static_cast<s32>(input[index + 0]);
        const auto s1 = static_cast<s32>(input[index + 1]);
        const auto s2 = static_cast<s32>(input[index + 2]);
        const auto s3 = static_cast<s32>(input[index + 3]);
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -11,7 +11,6 @@
 #include "audio_core/info_updater.h"
 #include "audio_core/voice_context.h"
 #include "common/logging/log.h"
-#include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"
 #include "core/settings.h"

@@ -71,10 +70,9 @@ namespace {
 namespace AudioCore {
 AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
                             AudioCommon::AudioRendererParameter params,
-                             std::shared_ptr<Kernel::WritableEvent> buffer_event_,
+                             Stream::ReleaseCallback&& release_callback,
                             std::size_t instance_number)
-    : worker_params{params}, buffer_event{buffer_event_},
-      memory_pool_info(params.effect_count + params.voice_count * 4),
+    : worker_params{params}, memory_pool_info(params.effect_count + params.voice_count * 4),
      voice_context(params.voice_count), effect_context(params.effect_count), mix_context(),
      sink_context(params.sink_count), splitter_context(),
      voices(params.voice_count), memory{memory_},
@@ -85,10 +83,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
                                params.num_splitter_send_channels);
    mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count);
    audio_out = std::make_unique<AudioCore::AudioOut>();
-    stream =
-        audio_out->OpenStream(core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
-                              fmt::format("AudioRenderer-Instance{}", instance_number),
-                              [=]() { buffer_event_->Signal(); });
+    stream = audio_out->OpenStream(
+        core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
+        fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
    audio_out->StartStream(stream);

    QueueMixedBuffer(0);
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -27,10 +27,6 @@ namespace Core::Timing {
 class CoreTiming;
 }

-namespace Kernel {
-class WritableEvent;
-}
-
 namespace Core::Memory {
 class Memory;
 }
@@ -44,8 +40,7 @@ class AudioRenderer {
 public:
    AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
                  AudioCommon::AudioRendererParameter params,
-                  std::shared_ptr<Kernel::WritableEvent> buffer_event_,
-                  std::size_t instance_number);
+                  Stream::ReleaseCallback&& release_callback, std::size_t instance_number);
    ~AudioRenderer();

    [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
@@ -61,7 +56,6 @@ private:
    BehaviorInfo behavior_info{};

    AudioCommon::AudioRendererParameter worker_params;
-    std::shared_ptr<Kernel::WritableEvent> buffer_event;
    std::vector<ServerMemoryPoolInfo> memory_pool_info;
    VoiceContext voice_context;
    EffectContext effect_context;
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -130,7 +130,11 @@ bool Stream::ContainsBuffer([[maybe_unused]] Buffer::Tag tag) const {
 std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) {
    std::vector<Buffer::Tag> tags;
    for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
-        tags.push_back(released_buffers.front()->GetTag());
+        if (released_buffers.front()) {
+            tags.push_back(released_buffers.front()->GetTag());
+        } else {
+            ASSERT_MSG(false, "Invalid tag in released_buffers!");
+        }
        released_buffers.pop();
    }
    return tags;
@@ -140,7 +144,11 @@ std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers() {
    std::vector<Buffer::Tag> tags;
    tags.reserve(released_buffers.size());
    while (!released_buffers.empty()) {
-        tags.push_back(released_buffers.front()->GetTag());
+        if (released_buffers.front()) {
+            tags.push_back(released_buffers.front()->GetTag());
+        } else {
+            ASSERT_MSG(false, "Invalid tag in released_buffers!");
+        }
        released_buffers.pop();
    }
    return tags;
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -135,8 +135,6 @@ add_library(common STATIC
    math_util.h
    memory_detect.cpp
    memory_detect.h
-    memory_hook.cpp
-    memory_hook.h
    microprofile.cpp
    microprofile.h
    microprofileui.h
@@ -162,6 +160,8 @@ add_library(common STATIC
    thread.cpp
    thread.h
    thread_queue_list.h
+    thread_worker.cpp
+    thread_worker.h
    threadsafe_queue.h
    time_zone.cpp
    time_zone.h
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -29,22 +29,19 @@ assert_noinline_call(const Fn& fn) {
 }

 #define ASSERT(_a_)                                                                                \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); });                \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed!");                                                  \
+    }

 #define ASSERT_MSG(_a_, ...)                                                                       \
-    do                                                                                             \
-        if (!(_a_)) {                                                                              \
-            assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \
-        }                                                                                          \
-    while (0)
+    if (!(_a_)) {                                                                                  \
+        LOG_CRITICAL(Debug, "Assertion Failed! " __VA_ARGS__);                                     \
+    }

-#define UNREACHABLE() assert_noinline_call([] { LOG_CRITICAL(Debug, "Unreachable code!"); })
+#define UNREACHABLE()                                                                              \
+    { LOG_CRITICAL(Debug, "Unreachable code!"); }
 #define UNREACHABLE_MSG(...)                                                                       \
-    assert_noinline_call([&] { LOG_CRITICAL(Debug, "Unreachable code!\n" __VA_ARGS__); })
+    { LOG_CRITICAL(Debug, "Unreachable code!\n" __VA_ARGS__); }

 #ifdef _DEBUG
 #define DEBUG_ASSERT(_a_) ASSERT(_a_)
--- a/src/common/concepts.h
+++ b/src/common/concepts.h
@@ -31,4 +31,8 @@ concept DerivedFrom = requires {
    std::is_convertible_v<const volatile Derived*, const volatile Base*>;
 };

+// TODO: Replace with std::convertible_to when libc++ implements it.
+template <typename From, typename To>
+concept ConvertibleTo = std::is_convertible_v<From, To>;
+
 } // namespace Common
--- a/src/common/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -1,11 +0,0 @@
-// Copyright 2018 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/memory_hook.h"
-
-namespace Common {
-
-MemoryHook::~MemoryHook() = default;
-
-} // namespace Common
--- a/src/common/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -1,47 +0,0 @@
-// Copyright 2016 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <optional>
-
-#include "common/common_types.h"
-
-namespace Common {
-
-/**
- * Memory hooks have two purposes:
- * 1. To allow reads and writes to a region of memory to be intercepted. This is used to implement
- *    texture forwarding and memory breakpoints for debugging.
- * 2. To allow for the implementation of MMIO devices.
- *
- * A hook may be mapped to multiple regions of memory.
- *
- * If a std::nullopt or false is returned from a function, the read/write request is passed through
- * to the underlying memory region.
- */
-class MemoryHook {
-public:
-    virtual ~MemoryHook();
-
-    virtual std::optional<bool> IsValidAddress(VAddr addr) = 0;
-
-    virtual std::optional<u8> Read8(VAddr addr) = 0;
-    virtual std::optional<u16> Read16(VAddr addr) = 0;
-    virtual std::optional<u32> Read32(VAddr addr) = 0;
-    virtual std::optional<u64> Read64(VAddr addr) = 0;
-
-    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;
-
-    virtual bool Write8(VAddr addr, u8 data) = 0;
-    virtual bool Write16(VAddr addr, u16 data) = 0;
-    virtual bool Write32(VAddr addr, u32 data) = 0;
-    virtual bool Write64(VAddr addr, u64 data) = 0;
-
-    virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) = 0;
-};
-
-using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Common
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -10,16 +10,10 @@ PageTable::PageTable() = default;

 PageTable::~PageTable() noexcept = default;

-void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits,
-                       bool has_attribute) {
-    const std::size_t num_page_table_entries{1ULL
-                                             << (address_space_width_in_bits - page_size_in_bits)};
+void PageTable::Resize(size_t address_space_width_in_bits, size_t page_size_in_bits) {
+    const size_t num_page_table_entries{1ULL << (address_space_width_in_bits - page_size_in_bits)};
    pointers.resize(num_page_table_entries);
    backing_addr.resize(num_page_table_entries);
-
-    if (has_attribute) {
-        attributes.resize(num_page_table_entries);
-    }
 }

 } // namespace Common
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -4,10 +4,10 @@

 #pragma once

+#include <atomic>
 #include <tuple>

 #include "common/common_types.h"
-#include "common/memory_hook.h"
 #include "common/virtual_buffer.h"

 namespace Common {
@@ -20,27 +20,6 @@ enum class PageType : u8 {
    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
    /// invalidation
    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-    /// Page is allocated for use.
-    Allocated,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    [[nodiscard]] bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    [[nodiscard]] bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
 };

 /**
@@ -48,6 +27,59 @@ struct SpecialRegion {
 * mimics the way a real CPU page table works.
 */
 struct PageTable {
+    /// Number of bits reserved for attribute tagging.
+    /// This can be at most the guaranteed alignment of the pointers in the page table.
+    static constexpr int ATTRIBUTE_BITS = 2;
+
+    /**
+     * Pair of host pointer and page type attribute.
+     * This uses the lower bits of a given pointer to store the attribute tag.
+     * Writing and reading the pointer attribute pair is guaranteed to be atomic for the same method
+     * call. In other words, they are guaranteed to be synchronized at all times.
+     */
+    class PageInfo {
+    public:
+        /// Returns the page pointer
+        [[nodiscard]] u8* Pointer() const noexcept {
+            return ExtractPointer(raw.load(std::memory_order_relaxed));
+        }
+
+        /// Returns the page type attribute
+        [[nodiscard]] PageType Type() const noexcept {
+            return ExtractType(raw.load(std::memory_order_relaxed));
+        }
+
+        /// Returns the page pointer and attribute pair, extracted from the same atomic read
+        [[nodiscard]] std::pair<u8*, PageType> PointerType() const noexcept {
+            const uintptr_t non_atomic_raw = raw.load(std::memory_order_relaxed);
+            return {ExtractPointer(non_atomic_raw), ExtractType(non_atomic_raw)};
+        }
+
+        /// Returns the raw representation of the page information.
+        /// Use ExtractPointer and ExtractType to unpack the value.
+        [[nodiscard]] uintptr_t Raw() const noexcept {
+            return raw.load(std::memory_order_relaxed);
+        }
+
+        /// Write a page pointer and type pair atomically
+        void Store(u8* pointer, PageType type) noexcept {
+            raw.store(reinterpret_cast<uintptr_t>(pointer) | static_cast<uintptr_t>(type));
+        }
+
+        /// Unpack a pointer from a page info raw representation
+        [[nodiscard]] static u8* ExtractPointer(uintptr_t raw) noexcept {
+            return reinterpret_cast<u8*>(raw & (~uintptr_t{0} << ATTRIBUTE_BITS));
+        }
+
+        /// Unpack a page type from a page info raw representation
+        [[nodiscard]] static PageType ExtractType(uintptr_t raw) noexcept {
+            return static_cast<PageType>(raw & ((uintptr_t{1} << ATTRIBUTE_BITS) - 1));
+        }
+
+    private:
+        std::atomic<uintptr_t> raw;
+    };
+
    PageTable();
    ~PageTable() noexcept;

@@ -58,25 +90,21 @@ struct PageTable {
    PageTable& operator=(PageTable&&) noexcept = default;

    /**
-     * Resizes the page table to be able to accomodate enough pages within
+     * Resizes the page table to be able to accommodate enough pages within
     * a given address space.
     *
     * @param address_space_width_in_bits The address size width in bits.
     * @param page_size_in_bits           The page size in bits.
-     * @param has_attribute               Whether or not this page has any backing attributes.
     */
-    void Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits,
-                bool has_attribute);
+    void Resize(size_t address_space_width_in_bits, size_t page_size_in_bits);

    /**
     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
+     * corresponding attribute element is of type `Memory`.
     */
-    VirtualBuffer<u8*> pointers;
+    VirtualBuffer<PageInfo> pointers;

    VirtualBuffer<u64> backing_addr;
-
-    VirtualBuffer<PageType> attributes;
 };

 } // namespace Common
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -394,7 +394,7 @@ public:
    template <typename S, typename T2, typename F2>
    friend S operator%(const S& p, const swapped_t v);

-    // Arithmetics + assignements
+    // Arithmetics + assignments
    template <typename S, typename T2, typename F2>
    friend S operator+=(const S& p, const swapped_t v);

@@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t<T, F> v) {
    return i % v.swap();
 }

-// Arithmetics + assignements
+// Arithmetics + assignments
 template <typename S, typename T, typename F>
 S& operator+=(S& i, const swap_struct_t<T, F> v) {
    i += v.swap();
--- a/src/common/thread_worker.cpp
+++ b/src/common/thread_worker.cpp
@@ -0,0 +1,58 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/thread.h"
+#include "common/thread_worker.h"
+
+namespace Common {
+
+ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) {
+    for (std::size_t i = 0; i < num_workers; ++i)
+        threads.emplace_back([this, thread_name{std::string{name}}] {
+            Common::SetCurrentThreadName(thread_name.c_str());
+
+            // Wait for first request
+            {
+                std::unique_lock lock{queue_mutex};
+                condition.wait(lock, [this] { return stop || !requests.empty(); });
+            }
+
+            while (true) {
+                std::function<void()> task;
+
+                {
+                    std::unique_lock lock{queue_mutex};
+                    condition.wait(lock, [this] { return stop || !requests.empty(); });
+                    if (stop || requests.empty()) {
+                        return;
+                    }
+                    task = std::move(requests.front());
+                    requests.pop();
+                }
+
+                task();
+            }
+        });
+}
+
+ThreadWorker::~ThreadWorker() {
+    {
+        std::unique_lock lock{queue_mutex};
+        stop = true;
+    }
+    condition.notify_all();
+    for (std::thread& thread : threads) {
+        thread.join();
+    }
+}
+
+void ThreadWorker::QueueWork(std::function<void()>&& work) {
+    {
+        std::unique_lock lock{queue_mutex};
+        requests.emplace(work);
+    }
+    condition.notify_one();
+}
+
+} // namespace Common
--- a/src/common/thread_worker.h
+++ b/src/common/thread_worker.h
@@ -0,0 +1,30 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <mutex>
+#include <string>
+#include <vector>
+#include <queue>
+
+namespace Common {
+
+class ThreadWorker final {
+public:
+    explicit ThreadWorker(std::size_t num_workers, const std::string& name);
+    ~ThreadWorker();
+    void QueueWork(std::function<void()>&& work);
+
+private:
+    std::vector<std::thread> threads;
+    std::queue<std::function<void()>> requests;
+    std::mutex queue_mutex;
+    std::condition_variable condition;
+    std::atomic_bool stop{};
+};
+
+} // namespace Common
--- a/src/common/virtual_buffer.h
+++ b/src/common/virtual_buffer.h
@@ -15,10 +15,12 @@ void FreeMemoryPages(void* base, std::size_t size) noexcept;
 template <typename T>
 class VirtualBuffer final {
 public:
-    static_assert(
-        std::is_trivially_constructible_v<T>,
-        "T must be trivially constructible, as non-trivial constructors will not be executed "
-        "with the current allocator");
+    // TODO: Uncomment this and change Common::PageTable::PageInfo to be trivially constructible
+    // using std::atomic_ref once libc++ has support for it
+    // static_assert(
+    //     std::is_trivially_constructible_v<T>,
+    //     "T must be trivially constructible, as non-trivial constructors will not be executed "
+    //     "with the current allocator");

    constexpr VirtualBuffer() = default;
    explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} {
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -202,6 +202,8 @@ add_library(core STATIC
    hle/kernel/server_port.h
    hle/kernel/server_session.cpp
    hle/kernel/server_session.h
+    hle/kernel/service_thread.cpp
+    hle/kernel/service_thread.h
    hle/kernel/session.cpp
    hle/kernel/session.h
    hle/kernel/shared_memory.cpp
@@ -500,7 +502,6 @@ add_library(core STATIC
    hle/service/sm/controller.h
    hle/service/sm/sm.cpp
    hle/service/sm/sm.h
-    hle/service/sockets/blocking_worker.h
    hle/service/sockets/bsd.cpp
    hle/service/sockets/bsd.h
    hle/service/sockets/ethc.cpp
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -133,6 +133,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
    config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
        page_table.pointers.data());
    config.absolute_offset_page_table = true;
+    config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
    config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
    config.only_detect_misalignment_via_page_table_on_page_boundary = true;

@@ -180,6 +181,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
        if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
        }
+        if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+        }
    }

    return std::make_unique<Dynarmic::A32::Jit>(config);
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -152,6 +152,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
    // Memory
    config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
    config.page_table_address_space_bits = address_space_bits;
+    config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
    config.silently_mirror_page_table = false;
    config.absolute_offset_page_table = true;
    config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
@@ -211,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
        if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
        }
+        if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
+        }
    }

    return std::make_shared<Dynarmic::A64::Jit>(config);
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -159,7 +159,7 @@ struct System::Impl {
        device_memory = std::make_unique<Core::DeviceMemory>();

        is_multicore = Settings::values.use_multi_core.GetValue();
-        is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue();
+        is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue();

        kernel.SetMulticore(is_multicore);
        cpu_manager.SetMulticore(is_multicore);
@@ -307,7 +307,6 @@ struct System::Impl {
        service_manager.reset();
        cheat_engine.reset();
        telemetry_session.reset();
-        device_memory.reset();

        // Close all CPU/threading state
        cpu_manager.Shutdown();
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -46,43 +46,6 @@ void SessionRequestHandler::ClientDisconnected(
    boost::range::remove_erase(connected_sessions, server_session);
 }

-std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
-    const std::string& reason, u64 timeout, WakeupCallback&& callback,
-    std::shared_ptr<WritableEvent> writable_event) {
-    // Put the client thread to sleep until the wait event is signaled or the timeout expires.
-
-    if (!writable_event) {
-        // Create event if not provided
-        const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason);
-        writable_event = pair.writable;
-    }
-
-    Handle event_handle = InvalidHandle;
-    {
-        KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
-        thread->SetHLECallback(
-            [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
-                ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
-                                                ? ThreadWakeupReason::Timeout
-                                                : ThreadWakeupReason::Signal;
-                callback(thread, context, reason);
-                context.WriteToOutgoingCommandBuffer(*thread);
-                return true;
-            });
-        const auto readable_event{writable_event->GetReadableEvent()};
-        writable_event->Clear();
-        thread->SetHLESyncObject(readable_event.get());
-        thread->SetStatus(ThreadStatus::WaitHLEEvent);
-        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
-        readable_event->AddWaitingThread(thread);
-    }
-    thread->SetHLETimeEvent(event_handle);
-
-    is_thread_waiting = true;
-
-    return writable_event;
-}
-
 HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory,
                                     std::shared_ptr<ServerSession> server_session,
                                     std::shared_ptr<Thread> thread)
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -129,23 +129,6 @@ public:
    using WakeupCallback = std::function<void(
        std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>;

-    /**
-     * Puts the specified guest thread to sleep until the returned event is signaled or until the
-     * specified timeout expires.
-     * @param reason Reason for pausing the thread, to be used for debugging purposes.
-     * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
-     * invoked with a Timeout reason.
-     * @param callback Callback to be invoked when the thread is resumed. This callback must write
-     * the entire command response once again, regardless of the state of it before this function
-     * was called.
-     * @param writable_event Event to use to wake up the thread. If unspecified, an event will be
-     * created.
-     * @returns Event that when signaled will resume the thread and call the callback function.
-     */
-    std::shared_ptr<WritableEvent> SleepClientThread(
-        const std::string& reason, u64 timeout, WakeupCallback&& callback,
-        std::shared_ptr<WritableEvent> writable_event = nullptr);
-
    /// Populates this context with data from the requesting process/thread.
    ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table,
                                                 u32_le* src_cmdbuf);
--- a/src/core/hle/kernel/k_priority_queue.h
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -8,11 +8,13 @@
 #pragma once

 #include <array>
+#include <concepts>

 #include "common/assert.h"
 #include "common/bit_set.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
+#include "common/concepts.h"

 namespace Kernel {

@@ -21,7 +23,7 @@ class Thread;
 template <typename T>
 concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) {
    { t.GetAffinityMask() }
-    ->std::convertible_to<u64>;
+    ->Common::ConvertibleTo<u64>;
    {t.SetAffinityMask(std::declval<u64>())};

    { t.GetAffinity(std::declval<int32_t>()) }
@@ -48,9 +50,9 @@ concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) {
    ->KPriorityQueueAffinityMask;

    { t.GetActiveCore() }
-    ->std::convertible_to<s32>;
+    ->Common::ConvertibleTo<s32>;
    { t.GetPriority() }
-    ->std::convertible_to<s32>;
+    ->Common::ConvertibleTo<s32>;
 };

 template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority>
--- a/src/core/hle/kernel/k_scheduler_lock.h
+++ b/src/core/hle/kernel/k_scheduler_lock.h
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/spin_lock.h"
 #include "core/hardware_properties.h"
+#include "core/hle/kernel/kernel.h"

 namespace Kernel {

--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -8,13 +8,14 @@
 #include <functional>
 #include <memory>
 #include <thread>
-#include <unordered_map>
+#include <unordered_set>
 #include <utility>

 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/thread.h"
+#include "common/thread_worker.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/cpu_interrupt_handler.h"
 #include "core/arm/exclusive_monitor.h"
@@ -35,6 +36,7 @@
 #include "core/hle/kernel/physical_core.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/synchronization.h"
 #include "core/hle/kernel/thread.h"
@@ -60,6 +62,8 @@ struct KernelCore::Impl {
        RegisterHostThread();

        global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
+        service_thread_manager =
+            std::make_unique<Common::ThreadWorker>(1, "yuzu:ServiceThreadManager");

        InitializePhysicalCores();
        InitializeSystemResourceLimit(kernel);
@@ -76,6 +80,12 @@ struct KernelCore::Impl {
    }

    void Shutdown() {
+        process_list.clear();
+
+        // Ensures all service threads gracefully shutdown
+        service_thread_manager.reset();
+        service_threads.clear();
+
        next_object_id = 0;
        next_kernel_process_id = Process::InitialKIPIDMin;
        next_user_process_id = Process::ProcessIDMin;
@@ -89,8 +99,6 @@ struct KernelCore::Impl {

        cores.clear();

-        process_list.clear();
-
        current_process = nullptr;

        system_resource_limit = nullptr;
@@ -103,10 +111,8 @@ struct KernelCore::Impl {

        exclusive_monitor.reset();

-        num_host_threads = 0;
-        std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
-                  std::thread::id{});
-        std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
+        // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
+        next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
    }

    void InitializePhysicalCores() {
@@ -186,52 +192,46 @@ struct KernelCore::Impl {
        }
    }

+    /// Creates a new host thread ID, should only be called by GetHostThreadId
+    u32 AllocateHostThreadId(std::optional<std::size_t> core_id) {
+        if (core_id) {
+            // The first for slots are reserved for CPU core threads
+            ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES);
+            return static_cast<u32>(*core_id);
+        } else {
+            return next_host_thread_id++;
+        }
+    }
+
+    /// Gets the host thread ID for the caller, allocating a new one if this is the first time
+    u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) {
+        const thread_local auto host_thread_id{AllocateHostThreadId(core_id)};
+        return host_thread_id;
+    }
+
+    /// Registers a CPU core thread by allocating a host thread ID for it
    void RegisterCoreThread(std::size_t core_id) {
-        const std::thread::id this_id = std::this_thread::get_id();
+        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+        const auto this_id = GetHostThreadId(core_id);
        if (!is_multicore) {
            single_core_thread_id = this_id;
        }
-        const auto end =
-            register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
-        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
-        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
-        ASSERT(it == end);
-        InsertHostThread(static_cast<u32>(core_id));
    }

+    /// Registers a new host thread by allocating a host thread ID for it
    void RegisterHostThread() {
-        const std::thread::id this_id = std::this_thread::get_id();
-        const auto end =
-            register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
-        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
-        if (it == end) {
-            InsertHostThread(registered_thread_ids++);
-        }
+        [[maybe_unused]] const auto this_id = GetHostThreadId();
    }

-    void InsertHostThread(u32 value) {
-        const size_t index = num_host_threads++;
-        ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
-        register_host_thread_values[index] = value;
-        register_host_thread_keys[index] = std::this_thread::get_id();
-    }
-
-    [[nodiscard]] u32 GetCurrentHostThreadID() const {
-        const std::thread::id this_id = std::this_thread::get_id();
+    [[nodiscard]] u32 GetCurrentHostThreadID() {
+        const auto this_id = GetHostThreadId();
        if (!is_multicore && single_core_thread_id == this_id) {
            return static_cast<u32>(system.GetCpuManager().CurrentCore());
        }
-        const auto end =
-            register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
-        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
-        if (it == end) {
-            return Core::INVALID_HOST_THREAD_ID;
-        }
-        return register_host_thread_values[static_cast<size_t>(
-            std::distance(register_host_thread_keys.begin(), it))];
+        return this_id;
    }

-    Core::EmuThreadHandle GetCurrentEmuThreadID() const {
+    [[nodiscard]] Core::EmuThreadHandle GetCurrentEmuThreadID() {
        Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle();
        result.host_handle = GetCurrentHostThreadID();
        if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) {
@@ -325,15 +325,8 @@ struct KernelCore::Impl {
    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
    std::vector<Kernel::PhysicalCore> cores;

-    // 0-3 IDs represent core threads, >3 represent others
-    std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
-
-    // Number of host threads is a relatively high number to avoid overflowing
-    static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
-    std::atomic<size_t> num_host_threads{0};
-    std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
-        register_host_thread_keys{};
-    std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};
+    // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
+    std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES};

    // Kernel memory management
    std::unique_ptr<Memory::MemoryManager> memory_manager;
@@ -345,12 +338,19 @@ struct KernelCore::Impl {
    std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
    std::shared_ptr<Kernel::SharedMemory> time_shared_mem;

+    // Threads used for services
+    std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
+
+    // Service threads are managed by a worker thread, so that a calling service thread can queue up
+    // the release of itself
+    std::unique_ptr<Common::ThreadWorker> service_thread_manager;
+
    std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
    std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};

    bool is_multicore{};
-    std::thread::id single_core_thread_id{};
+    u32 single_core_thread_id{};

    std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};

@@ -639,4 +639,19 @@ void KernelCore::ExitSVCProfile() {
    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
 }

+std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
+    auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name);
+    impl->service_thread_manager->QueueWork(
+        [this, service_thread] { impl->service_threads.emplace(service_thread); });
+    return service_thread;
+}
+
+void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
+    impl->service_thread_manager->QueueWork([this, service_thread] {
+        if (auto strong_ptr = service_thread.lock()) {
+            impl->service_threads.erase(strong_ptr);
+        }
+    });
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -42,6 +42,7 @@ class Process;
 class ResourceLimit;
 class KScheduler;
 class SharedMemory;
+class ServiceThread;
 class Synchronization;
 class Thread;
 class TimeManager;
@@ -227,6 +228,22 @@ public:

    void ExitSVCProfile();

+    /**
+     * Creates an HLE service thread, which are used to execute service routines asynchronously.
+     * While these are allocated per ServerSession, these need to be owned and managed outside of
+     * ServerSession to avoid a circular dependency.
+     * @param name String name for the ServerSession creating this thread, used for debug purposes.
+     * @returns The a weak pointer newly created service thread.
+     */
+    std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name);
+
+    /**
+     * Releases a HLE service thread, instructing KernelCore to free it. This should be called when
+     * the ServerSession associated with the thread is destroyed.
+     * @param service_thread Service thread to release.
+     */
+    void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread);
+
 private:
    friend class Object;
    friend class Process;
--- a/src/core/hle/kernel/memory/memory_block.h
+++ b/src/core/hle/kernel/memory/memory_block.h
@@ -73,12 +73,12 @@ enum class MemoryState : u32 {
    ThreadLocal =
        static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted,

-    Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc |
-                 FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
-                 FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
+    Transferred = static_cast<u32>(Svc::MemoryState::Transferred) | FlagsMisc |
+                  FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
+                  FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,

-    SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc |
-                       FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
+    SharedTransferred = static_cast<u32>(Svc::MemoryState::SharedTransferred) | FlagsMisc |
+                        FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,

    SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped |
                 FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
@@ -111,8 +111,8 @@ static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09);
 static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
 static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
 static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
-static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D);
-static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E);
+static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D);
+static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E);
 static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
 static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
 static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -265,7 +265,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
    physical_memory_usage = 0;
    memory_pool = pool;

-    page_table_impl.Resize(address_space_width, PageBits, true);
+    page_table_impl.Resize(address_space_width, PageBits);

    return InitializeMemoryLayout(start, end);
 }
@@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
    case MemoryState::Shared:
    case MemoryState::AliasCode:
    case MemoryState::AliasCodeData:
-    case MemoryState::Transfered:
-    case MemoryState::SharedTransfered:
+    case MemoryState::Transferred:
+    case MemoryState::SharedTransferred:
    case MemoryState::SharedCode:
    case MemoryState::GeneratedCode:
    case MemoryState::CodeOut:
@@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
    case MemoryState::Shared:
    case MemoryState::AliasCode:
    case MemoryState::AliasCodeData:
-    case MemoryState::Transfered:
-    case MemoryState::SharedTransfered:
+    case MemoryState::Transferred:
+    case MemoryState::SharedTransferred:
    case MemoryState::SharedCode:
    case MemoryState::GeneratedCode:
    case MemoryState::CodeOut:
@@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
    case MemoryState::AliasCodeData:
    case MemoryState::Stack:
    case MemoryState::ThreadLocal:
-    case MemoryState::Transfered:
-    case MemoryState::SharedTransfered:
+    case MemoryState::Transferred:
+    case MemoryState::SharedTransferred:
    case MemoryState::SharedCode:
    case MemoryState::GeneratedCode:
    case MemoryState::CodeOut:
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -25,19 +25,19 @@
 namespace Kernel {

 ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
-ServerSession::~ServerSession() = default;
+
+ServerSession::~ServerSession() {
+    kernel.ReleaseServiceThread(service_thread);
+}

 ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
                                                                std::shared_ptr<Session> parent,
                                                                std::string name) {
    std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)};

-    session->request_event =
-        Core::Timing::CreateEvent(name, [session](std::uintptr_t, std::chrono::nanoseconds) {
-            session->CompleteSyncRequest();
-        });
    session->name = std::move(name);
    session->parent = std::move(parent);
+    session->service_thread = kernel.CreateServiceThread(session->name);

    return MakeResult(std::move(session));
 }
@@ -142,16 +142,16 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread,
        std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));

    context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
-    request_queue.Push(std::move(context));
+
+    if (auto strong_ptr = service_thread.lock()) {
+        strong_ptr->QueueSyncRequest(*this, std::move(context));
+        return RESULT_SUCCESS;
+    }

    return RESULT_SUCCESS;
 }

-ResultCode ServerSession::CompleteSyncRequest() {
-    ASSERT(!request_queue.Empty());
-
-    auto& context = *request_queue.Front();
-
+ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) {
    ResultCode result = RESULT_SUCCESS;
    // If the session has been converted to a domain, handle the domain request
    if (IsDomain() && context.HasDomainMessageHeader()) {
@@ -177,18 +177,13 @@ ResultCode ServerSession::CompleteSyncRequest() {
        }
    }

-    request_queue.Pop();
-
    return result;
 }

 ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
                                            Core::Memory::Memory& memory,
                                            Core::Timing::CoreTiming& core_timing) {
-    const ResultCode result = QueueSyncRequest(std::move(thread), memory);
-    const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000};
-    core_timing.ScheduleEvent(delay, request_event, {});
-    return result;
+    return QueueSyncRequest(std::move(thread), memory);
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,6 +10,7 @@
 #include <vector>

 #include "common/threadsafe_queue.h"
+#include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/synchronization_object.h"
 #include "core/hle/result.h"

@@ -43,6 +44,8 @@ class Thread;
 * TLS buffer and control is transferred back to it.
 */
 class ServerSession final : public SynchronizationObject {
+    friend class ServiceThread;
+
 public:
    explicit ServerSession(KernelCore& kernel);
    ~ServerSession() override;
@@ -132,7 +135,7 @@ private:
    ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory);

    /// Completes a sync request from the emulated application.
-    ResultCode CompleteSyncRequest();
+    ResultCode CompleteSyncRequest(HLERequestContext& context);

    /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an
    /// object handle.
@@ -163,11 +166,8 @@ private:
    /// The name of this session (optional)
    std::string name;

-    /// Core timing event used to schedule the service request at some point in the future
-    std::shared_ptr<Core::Timing::EventType> request_event;
-
-    /// Queue of scheduled service requests
-    Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue;
+    /// Thread to dispatch service requests
+    std::weak_ptr<ServiceThread> service_thread;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/service_thread.cpp
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -0,0 +1,110 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include <queue>
+
+#include "common/assert.h"
+#include "common/scope_exit.h"
+#include "common/thread.h"
+#include "core/core.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/service_thread.h"
+#include "core/hle/lock.h"
+#include "video_core/renderer_base.h"
+
+namespace Kernel {
+
+class ServiceThread::Impl final {
+public:
+    explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name);
+    ~Impl();
+
+    void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
+
+private:
+    std::vector<std::thread> threads;
+    std::queue<std::function<void()>> requests;
+    std::mutex queue_mutex;
+    std::condition_variable condition;
+    const std::string service_name;
+    bool stop{};
+};
+
+ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name)
+    : service_name{name} {
+    for (std::size_t i = 0; i < num_threads; ++i)
+        threads.emplace_back([this, &kernel] {
+            Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str());
+
+            // Wait for first request before trying to acquire a render context
+            {
+                std::unique_lock lock{queue_mutex};
+                condition.wait(lock, [this] { return stop || !requests.empty(); });
+            }
+
+            kernel.RegisterHostThread();
+
+            while (true) {
+                std::function<void()> task;
+
+                {
+                    std::unique_lock lock{queue_mutex};
+                    condition.wait(lock, [this] { return stop || !requests.empty(); });
+                    if (stop || requests.empty()) {
+                        return;
+                    }
+                    task = std::move(requests.front());
+                    requests.pop();
+                }
+
+                task();
+            }
+        });
+}
+
+void ServiceThread::Impl::QueueSyncRequest(ServerSession& session,
+                                           std::shared_ptr<HLERequestContext>&& context) {
+    {
+        std::unique_lock lock{queue_mutex};
+
+        // ServerSession owns the service thread, so we cannot caption a strong pointer here in the
+        // event that the ServerSession is terminated.
+        std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)};
+        requests.emplace([weak_ptr, context{std::move(context)}]() {
+            if (auto strong_ptr = weak_ptr.lock()) {
+                strong_ptr->CompleteSyncRequest(*context);
+            }
+        });
+    }
+    condition.notify_one();
+}
+
+ServiceThread::Impl::~Impl() {
+    {
+        std::unique_lock lock{queue_mutex};
+        stop = true;
+    }
+    condition.notify_all();
+    for (std::thread& thread : threads) {
+        thread.join();
+    }
+}
+
+ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name)
+    : impl{std::make_unique<Impl>(kernel, num_threads, name)} {}
+
+ServiceThread::~ServiceThread() = default;
+
+void ServiceThread::QueueSyncRequest(ServerSession& session,
+                                     std::shared_ptr<HLERequestContext>&& context) {
+    impl->QueueSyncRequest(session, std::move(context));
+}
+
+} // namespace Kernel
--- a/src/core/hle/kernel/service_thread.h
+++ b/src/core/hle/kernel/service_thread.h
@@ -0,0 +1,28 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+namespace Kernel {
+
+class HLERequestContext;
+class KernelCore;
+class ServerSession;
+
+class ServiceThread final {
+public:
+    explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name);
+    ~ServiceThread();
+
+    void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
+
+private:
+    class Impl;
+    std::unique_ptr<Impl> impl;
+};
+
+} // namespace Kernel
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1583,7 +1583,7 @@ static void ExitThread32(Core::System& system) {

 /// Sleep the current thread
 static void SleepThread(Core::System& system, s64 nanoseconds) {
-    LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds);
+    LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);

    enum class SleepType : s64 {
        YieldWithoutCoreMigration = 0,
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -23,8 +23,8 @@ enum class MemoryState : u32 {
    Ipc = 0x0A,
    Stack = 0x0B,
    ThreadLocal = 0x0C,
-    Transfered = 0x0D,
-    SharedTransfered = 0x0E,
+    Transferred = 0x0D,
+    SharedTransferred = 0x0E,
    SharedCode = 0x0F,
    Inaccessible = 0x10,
    NonSecureIpc = 0x11,
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest

 AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
    on_new_message =
-        Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved");
+        Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived");
    on_operation_mode_changed =
        Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
 }

 AppletMessageQueue::~AppletMessageQueue() = default;

-const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMesssageRecieveEvent() const {
+const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMessageReceiveEvent() const {
    return on_new_message.readable;
 }

@@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent());
+    rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent());
 }

 void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -55,7 +55,7 @@ public:
    explicit AppletMessageQueue(Kernel::KernelCore& kernel);
    ~AppletMessageQueue();

-    const std::shared_ptr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const;
+    const std::shared_ptr<Kernel::ReadableEvent>& GetMessageReceiveEvent() const;
    const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const;
    void PushMessage(AppletMessage msg);
    AppletMessage PopMessage();
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -70,8 +70,10 @@ public:
            Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased");

        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
-                                       audio_params.channel_count, std::move(unique_name),
-                                       [this] { buffer_event.writable->Signal(); });
+                                       audio_params.channel_count, std::move(unique_name), [this] {
+                                           const auto guard = LockService();
+                                           buffer_event.writable->Signal();
+                                       });
    }

 private:
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -49,16 +49,16 @@ public:

        system_event =
            Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(),
-                                                              audren_params, system_event.writable,
-                                                              instance_number);
+        renderer = std::make_unique<AudioCore::AudioRenderer>(
+            system.CoreTiming(), system.Memory(), audren_params,
+            [this]() {
+                const auto guard = LockService();
+                system_event.writable->Signal();
+            },
+            instance_number);
    }

 private:
-    void UpdateAudioCallback() {
-        system_event.writable->Signal();
-    }
-
    void GetSampleRate(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Audio, "called");

--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -78,11 +78,13 @@ IAppletResource::IAppletResource(Core::System& system_)
    pad_update_event = Core::Timing::CreateEvent(
        "HID::UpdatePadCallback",
        [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
+            const auto guard = LockService();
            UpdateControllers(user_data, ns_late);
        });
    motion_update_event = Core::Timing::CreateEvent(
        "HID::MotionPadCallback",
        [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
+            const auto guard = LockService();
            UpdateMotion(user_data, ns_late);
        });

--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -31,8 +31,8 @@ public:
     * @param output A buffer where the output data will be written to.
     * @returns The result code of the ioctl.
     */
-    virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) = 0;
+    virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input,
+                            std::vector<u8>& output) = 0;

    /**
     * Handles an ioctl2 request.
@@ -43,8 +43,7 @@ public:
     * @returns The result code of the ioctl.
     */
    virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                            const std::vector<u8>& inline_input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) = 0;
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) = 0;

    /**
     * Handles an ioctl3 request.
@@ -55,7 +54,7 @@ public:
     * @returns The result code of the ioctl.
     */
    virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0;
+                            std::vector<u8>& inline_output) = 0;

 protected:
    Core::System& system;
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,21 +18,20 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de
    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
 nvdisp_disp0 ::~nvdisp_disp0() = default;

-NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                              const std::vector<u8>& inline_input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                              std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -20,13 +20,11 @@ public:
    explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvdisp_disp0() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -21,8 +21,8 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_
    : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;

-NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                               IoctlCtrl& ctrl) {
+NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                               std::vector<u8>& output) {
    switch (command.group) {
    case 'A':
        switch (command.cmd) {
@@ -55,14 +55,13 @@ NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std:
 }

 NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                               const std::vector<u8>& inline_input, std::vector<u8>& output,
-                               IoctlCtrl& ctrl) {
+                               const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                               std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                               std::vector<u8>& inline_output) {
    switch (command.group) {
    case 'A':
        switch (command.cmd) {
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -30,13 +30,11 @@ public:
    explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_as_gpu() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

 private:
    class BufferMap final {
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -20,8 +20,7 @@ nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
    : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
 nvhost_ctrl::~nvhost_ctrl() = default;

-NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                             IoctlCtrl& ctrl) {
+NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    switch (command.group) {
    case 0x0:
        switch (command.cmd) {
@@ -30,9 +29,9 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
        case 0x1c:
            return IocCtrlClearEventWait(input, output);
        case 0x1d:
-            return IocCtrlEventWait(input, output, false, ctrl);
+            return IocCtrlEventWait(input, output, false);
        case 0x1e:
-            return IocCtrlEventWait(input, output, true, ctrl);
+            return IocCtrlEventWait(input, output, true);
        case 0x1f:
            return IocCtrlEventRegister(input, output);
        case 0x20:
@@ -48,14 +47,13 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
 }

 NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                             const std::vector<u8>& inline_input, std::vector<u8>& output,
-                             IoctlCtrl& ctrl) {
+                             const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                             std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                             std::vector<u8>& inline_outpu) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
@@ -69,7 +67,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector
 }

 NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
-                                       bool is_async, IoctlCtrl& ctrl) {
+                                       bool is_async) {
    IocCtrlEventWaitParams params{};
    std::memcpy(&params, input.data(), sizeof(params));
    LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
@@ -141,12 +139,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
        params.value |= event_id;
        event.event.writable->Clear();
        gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
-        if (!is_async && ctrl.fresh_call) {
-            ctrl.must_delay = true;
-            ctrl.timeout = params.timeout;
-            ctrl.event_id = event_id;
-            return NvResult::Timeout;
-        }
        std::memcpy(output.data(), &params, sizeof(params));
        return NvResult::Timeout;
    }
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -18,13 +18,11 @@ public:
                         SyncpointManager& syncpoint_manager);
    ~nvhost_ctrl() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

 private:
    struct IocSyncptReadParams {
@@ -123,8 +121,7 @@ private:
    static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");

    NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
-    NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async,
-                              IoctlCtrl& ctrl);
+    NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
    NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
    NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -16,7 +16,7 @@ nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
 nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;

 NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
-                                 std::vector<u8>& output, IoctlCtrl& ctrl) {
+                                 std::vector<u8>& output) {
    switch (command.group) {
    case 'G':
        switch (command.cmd) {
@@ -48,15 +48,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
 }

 NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                                 const std::vector<u8>& inline_input, std::vector<u8>& output,
-                                 IoctlCtrl& ctrl) {
+                                 const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input,
-                                 std::vector<u8>& output, std::vector<u8>& inline_output,
-                                 IoctlCtrl& ctrl) {
+                                 std::vector<u8>& output, std::vector<u8>& inline_output) {
    switch (command.group) {
    case 'G':
        switch (command.cmd) {
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -16,13 +16,11 @@ public:
    explicit nvhost_ctrl_gpu(Core::System& system);
    ~nvhost_ctrl_gpu() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

 private:
    struct IoctlGpuCharacteristics {
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -23,8 +23,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,

 nvhost_gpu::~nvhost_gpu() = default;

-NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) {
+NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    switch (command.group) {
    case 0x0:
        switch (command.cmd) {
@@ -76,8 +75,7 @@ NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
 };

 NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                            const std::vector<u8>& inline_input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) {
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) {
    switch (command.group) {
    case 'H':
        switch (command.cmd) {
@@ -91,7 +89,7 @@ NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
 }

 NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                            std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -26,13 +26,11 @@ public:
                        SyncpointManager& syncpoint_manager);
    ~nvhost_gpu() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

 private:
    enum class CtxObjects : u32_le {
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -15,8 +15,8 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_de
    : nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
 nvhost_nvdec::~nvhost_nvdec() = default;

-NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
    switch (command.group) {
    case 0x0:
        switch (command.cmd) {
@@ -58,14 +58,13 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
 }

 NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                              const std::vector<u8>& inline_input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                              std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,13 +14,11 @@ public:
    explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_nvdec() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;
 };

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,8 +13,8 @@ namespace Service::Nvidia::Devices {
 nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
 nvhost_nvjpg::~nvhost_nvjpg() = default;

-NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input,
+                              std::vector<u8>& output) {
    switch (command.group) {
    case 'H':
        switch (command.cmd) {
@@ -33,14 +33,13 @@ NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
 }

 NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                              const std::vector<u8>& inline_input, std::vector<u8>& output,
-                              IoctlCtrl& ctrl) {
+                              const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                              std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                              std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,13 +16,11 @@ public:
    explicit nvhost_nvjpg(Core::System& system);
    ~nvhost_nvjpg() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

 private:
    struct IoctlSetNvmapFD {
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -15,8 +15,7 @@ nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)

 nvhost_vic::~nvhost_vic() = default;

-NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) {
+NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    switch (command.group) {
    case 0x0:
        switch (command.cmd) {
@@ -51,14 +50,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
 }

 NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                            const std::vector<u8>& inline_input, std::vector<u8>& output,
-                            IoctlCtrl& ctrl) {
+                            const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                            std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                            std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -14,12 +14,10 @@ public:
    explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
    ~nvhost_vic();

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;
 };
 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -19,8 +19,7 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) {

 nvmap::~nvmap() = default;

-NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                       IoctlCtrl& ctrl) {
+NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
    switch (command.group) {
    case 0x1:
        switch (command.cmd) {
@@ -49,14 +48,13 @@ NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<
 }

 NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input,
-                       const std::vector<u8>& inline_input, std::vector<u8>& output,
-                       IoctlCtrl& ctrl) {
+                       const std::vector<u8>& inline_input, std::vector<u8>& output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }

 NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                       std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                       std::vector<u8>& inline_output) {
    UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
    return NvResult::NotImplemented;
 }
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -19,13 +19,11 @@ public:
    explicit nvmap(Core::System& system);
    ~nvmap() override;

-    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+    NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
    NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output,
-                    IoctlCtrl& ctrl) override;
+                    const std::vector<u8>& inline_input, std::vector<u8>& output) override;
    NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
-                    std::vector<u8>& inline_output, IoctlCtrl& ctrl) override;
+                    std::vector<u8>& inline_output) override;

    /// Returns the allocated address of an nvmap object given its handle.
    VAddr GetObjectAddress(u32 handle) const;
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -61,32 +61,9 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
    const auto input_buffer = ctx.ReadBuffer(0);

-    IoctlCtrl ctrl{};
-
-    const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer, ctrl);
-    if (ctrl.must_delay) {
-        ctrl.fresh_call = false;
-        ctx.SleepClientThread(
-            "NVServices::DelayedResponse", ctrl.timeout,
-            [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
-                      Kernel::ThreadWakeupReason reason) {
-                IoctlCtrl ctrl2{ctrl};
-                std::vector<u8> tmp_output = output_buffer;
-                const auto nv_result2 = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output, ctrl2);
-
-                if (command.is_out != 0) {
-                    ctx.WriteBuffer(tmp_output);
-                }
-
-                IPC::ResponseBuilder rb{ctx_, 3};
-                rb.Push(RESULT_SUCCESS);
-                rb.PushEnum(nv_result2);
-            },
-            nvdrv->GetEventWriteable(ctrl.event_id));
-    } else {
-        if (command.is_out != 0) {
-            ctx.WriteBuffer(output_buffer);
-        }
+    const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
+    if (command.is_out != 0) {
+        ctx.WriteBuffer(output_buffer);
    }

    IPC::ResponseBuilder rb{ctx, 3};
@@ -110,36 +87,8 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
    const auto input_inlined_buffer = ctx.ReadBuffer(1);
    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));

-    IoctlCtrl ctrl{};
-
    const auto nv_result =
-        nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer, ctrl);
-    if (ctrl.must_delay) {
-        ctrl.fresh_call = false;
-        ctx.SleepClientThread(
-            "NVServices::DelayedResponse", ctrl.timeout,
-            [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
-                      Kernel::ThreadWakeupReason reason) {
-                IoctlCtrl ctrl2{ctrl};
-                std::vector<u8> tmp_output = output_buffer;
-                const auto nv_result2 = nvdrv->Ioctl2(fd, command, input_buffer,
-                                                      input_inlined_buffer, tmp_output, ctrl2);
-
-                if (command.is_out != 0) {
-                    ctx.WriteBuffer(tmp_output);
-                }
-
-                IPC::ResponseBuilder rb{ctx_, 3};
-                rb.Push(RESULT_SUCCESS);
-                rb.PushEnum(nv_result2);
-            },
-            nvdrv->GetEventWriteable(ctrl.event_id));
-    } else {
-        if (command.is_out != 0) {
-            ctx.WriteBuffer(output_buffer);
-        }
-    }
-
+        nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
    if (command.is_out != 0) {
        ctx.WriteBuffer(output_buffer);
    }
@@ -165,36 +114,11 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
    std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
    std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));

-    IoctlCtrl ctrl{};
    const auto nv_result =
-        nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline, ctrl);
-    if (ctrl.must_delay) {
-        ctrl.fresh_call = false;
-        ctx.SleepClientThread(
-            "NVServices::DelayedResponse", ctrl.timeout,
-            [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
-                      Kernel::ThreadWakeupReason reason) {
-                IoctlCtrl ctrl2{ctrl};
-                std::vector<u8> tmp_output = output_buffer;
-                std::vector<u8> tmp_output2 = output_buffer;
-                const auto nv_result2 =
-                    nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output2, ctrl2);
-
-                if (command.is_out != 0) {
-                    ctx.WriteBuffer(tmp_output, 0);
-                    ctx.WriteBuffer(tmp_output2, 1);
-                }
-
-                IPC::ResponseBuilder rb{ctx_, 3};
-                rb.Push(RESULT_SUCCESS);
-                rb.PushEnum(nv_result2);
-            },
-            nvdrv->GetEventWriteable(ctrl.event_id));
-    } else {
-        if (command.is_out != 0) {
-            ctx.WriteBuffer(output_buffer, 0);
-            ctx.WriteBuffer(output_buffer_inline, 1);
-        }
+        nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
+    if (command.is_out != 0) {
+        ctx.WriteBuffer(output_buffer, 0);
+        ctx.WriteBuffer(output_buffer_inline, 1);
    }

    IPC::ResponseBuilder rb{ctx, 3};
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -97,15 +97,4 @@ union Ioctl {
    BitField<31, 1, u32> is_out;
 };

-struct IoctlCtrl {
-    // First call done to the servioce for services that call itself again after a call.
-    bool fresh_call{true};
-    // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
-    bool must_delay{};
-    // Timeout for the delay
-    s64 timeout{};
-    // NV Event Id
-    s32 event_id{-1};
-};
-
 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -91,7 +91,7 @@ DeviceFD Module::Open(const std::string& device_name) {
 }

 NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                        std::vector<u8>& output, IoctlCtrl& ctrl) {
+                        std::vector<u8>& output) {
    if (fd < 0) {
        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
        return NvResult::InvalidState;
@@ -104,12 +104,11 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input
        return NvResult::NotImplemented;
    }

-    return itr->second->Ioctl1(command, input, output, ctrl);
+    return itr->second->Ioctl1(command, input, output);
 }

 NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                        const std::vector<u8>& inline_input, std::vector<u8>& output,
-                        IoctlCtrl& ctrl) {
+                        const std::vector<u8>& inline_input, std::vector<u8>& output) {
    if (fd < 0) {
        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
        return NvResult::InvalidState;
@@ -122,11 +121,11 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input
        return NvResult::NotImplemented;
    }

-    return itr->second->Ioctl2(command, input, inline_input, output, ctrl);
+    return itr->second->Ioctl2(command, input, inline_input, output);
 }

 NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                        std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl) {
+                        std::vector<u8>& output, std::vector<u8>& inline_output) {
    if (fd < 0) {
        LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
        return NvResult::InvalidState;
@@ -139,7 +138,7 @@ NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input
        return NvResult::NotImplemented;
    }

-    return itr->second->Ioctl3(command, input, output, inline_output, ctrl);
+    return itr->second->Ioctl3(command, input, output, inline_output);
 }

 NvResult Module::Close(DeviceFD fd) {
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -119,13 +119,13 @@ public:

    /// Sends an ioctl command to the specified file descriptor.
    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                    std::vector<u8>& output, IoctlCtrl& ctrl);
+                    std::vector<u8>& output);

    NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                    const std::vector<u8>& inline_input, std::vector<u8>& output, IoctlCtrl& ctrl);
+                    const std::vector<u8>& inline_input, std::vector<u8>& output);

    NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
-                    std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl);
+                    std::vector<u8>& output, std::vector<u8>& inline_output);

    /// Closes a device file descriptor and returns operation success.
    NvResult Close(DeviceFD fd);
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -25,7 +25,12 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
    ASSERT(slot < buffer_slots);
    LOG_WARNING(Service, "Adding graphics buffer {}", slot);

-    free_buffers.push_back(slot);
+    {
+        std::unique_lock lock{queue_mutex};
+        free_buffers.push_back(slot);
+    }
+    condition.notify_one();
+
    buffers[slot] = {
        .slot = slot,
        .status = Buffer::Status::Free,
@@ -41,10 +46,20 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)

 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
                                                                                       u32 height) {
+    // Wait for first request before trying to dequeue
+    {
+        std::unique_lock lock{queue_mutex};
+        condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
+    }

-    if (free_buffers.empty()) {
+    if (!is_connect) {
+        // Buffer was disconnected while the thread was blocked, this is most likely due to
+        // emulation being stopped
        return std::nullopt;
    }
+
+    std::unique_lock lock{queue_mutex};
+
    auto f_itr = free_buffers.begin();
    auto slot = buffers.size();

@@ -97,7 +112,11 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
    buffers[slot].multi_fence = multi_fence;
    buffers[slot].swap_interval = 0;

-    free_buffers.push_back(slot);
+    {
+        std::unique_lock lock{queue_mutex};
+        free_buffers.push_back(slot);
+    }
+    condition.notify_one();

    buffer_wait_event.writable->Signal();
 }
@@ -127,15 +146,26 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
    ASSERT(buffers[slot].slot == slot);

    buffers[slot].status = Buffer::Status::Free;
-    free_buffers.push_back(slot);
+    {
+        std::unique_lock lock{queue_mutex};
+        free_buffers.push_back(slot);
+    }
+    condition.notify_one();

    buffer_wait_event.writable->Signal();
 }

+void BufferQueue::Connect() {
+    queue_sequence.clear();
+    is_connect = true;
+}
+
 void BufferQueue::Disconnect() {
    buffers.fill({});
    queue_sequence.clear();
    buffer_wait_event.writable->Signal();
+    is_connect = false;
+    condition.notify_one();
 }

 u32 BufferQueue::Query(QueryType type) {
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,7 +4,9 @@

 #pragma once

+#include <condition_variable>
 #include <list>
+#include <mutex>
 #include <optional>
 #include <vector>

@@ -99,6 +101,7 @@ public:
    void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
+    void Connect();
    void Disconnect();
    u32 Query(QueryType type);

@@ -106,18 +109,28 @@ public:
        return id;
    }

+    bool IsConnected() const {
+        return is_connect;
+    }
+
    std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const;

    std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const;

 private:
-    u32 id;
-    u64 layer_id;
+    BufferQueue(const BufferQueue&) = delete;
+
+    u32 id{};
+    u64 layer_id{};
+    std::atomic_bool is_connect{};

    std::list<u32> free_buffers;
    std::array<Buffer, buffer_slots> buffers;
    std::list<u32> queue_sequence;
    Kernel::EventPair buffer_wait_event;
+
+    std::mutex queue_mutex;
+    std::condition_variable condition;
 };

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
 }

 NVFlinger::~NVFlinger() {
+    for (auto& buffer_queue : buffer_queues) {
+        buffer_queue->Disconnect();
+    }
+
    if (system.IsMulticore()) {
        is_running = false;
        wait_event->Set();
@@ -104,6 +108,8 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
 }

 std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
+    const auto guard = Lock();
+
    LOG_DEBUG(Service, "Opening \"{}\" display", name);

    // TODO(Subv): Currently we only support the Default display.
@@ -121,6 +127,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
 }

 std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
+    const auto guard = Lock();
    auto* const display = FindDisplay(display_id);

    if (display == nullptr) {
@@ -129,18 +136,22 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {

    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
-    buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id);
-    display->CreateLayer(layer_id, buffer_queues.back());
+    buffer_queues.emplace_back(
+        std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
+    display->CreateLayer(layer_id, *buffer_queues.back());
    return layer_id;
 }

 void NVFlinger::CloseLayer(u64 layer_id) {
+    const auto guard = Lock();
+
    for (auto& display : displays) {
        display.CloseLayer(layer_id);
    }
 }

 std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
+    const auto guard = Lock();
    const auto* const layer = FindLayer(display_id, layer_id);

    if (layer == nullptr) {
@@ -151,6 +162,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
 }

 std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
+    const auto guard = Lock();
    auto* const display = FindDisplay(display_id);

    if (display == nullptr) {
@@ -160,20 +172,16 @@ std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id)
    return display->GetVSyncEvent();
 }

-BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
+BufferQueue* NVFlinger::FindBufferQueue(u32 id) {
+    const auto guard = Lock();
    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
+                                  [id](const auto& queue) { return queue->GetId() == id; });

-    ASSERT(itr != buffer_queues.end());
-    return *itr;
-}
+    if (itr == buffer_queues.end()) {
+        return nullptr;
+    }

-const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
-    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
-
-    ASSERT(itr != buffer_queues.end());
-    return *itr;
+    return itr->get();
 }

 VI::Display* NVFlinger::FindDisplay(u64 display_id) {
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -75,10 +75,7 @@ public:
    [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
-    [[nodiscard]] BufferQueue& FindBufferQueue(u32 id);
-
-    /// Obtains a buffer queue identified by the ID.
-    [[nodiscard]] const BufferQueue& FindBufferQueue(u32 id) const;
+    [[nodiscard]] BufferQueue* FindBufferQueue(u32 id);

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
@@ -86,11 +83,11 @@ public:

    [[nodiscard]] s64 GetNextTicks() const;

+private:
    [[nodiscard]] std::unique_lock<std::mutex> Lock() const {
        return std::unique_lock{*guard};
    }

-private:
    /// Finds the display identified by the specified ID.
    [[nodiscard]] VI::Display* FindDisplay(u64 display_id);

@@ -110,7 +107,7 @@ private:
    std::shared_ptr<Nvidia::Module> nvdrv;

    std::vector<VI::Display> displays;
-    std::vector<BufferQueue> buffer_queues;
+    std::vector<std::unique_ptr<BufferQueue>> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
    u64 next_layer_id = 1;
--- a/src/core/hle/service/pcie/pcie.cpp
+++ b/src/core/hle/service/pcie/pcie.cpp
@@ -48,7 +48,7 @@ public:

 class PCIe final : public ServiceFramework<PCIe> {
 public:
-    explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} {
+    explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "RegisterClassDriver"},
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -95,9 +95,14 @@ ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* se
    : system{system_}, service_name{service_name_}, max_sessions{max_sessions_},
      handler_invoker{handler_invoker_} {}

-ServiceFrameworkBase::~ServiceFrameworkBase() = default;
+ServiceFrameworkBase::~ServiceFrameworkBase() {
+    // Wait for other threads to release access before destroying
+    const auto guard = LockService();
+}

 void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) {
+    const auto guard = LockService();
+
    ASSERT(!port_installed);

    auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap();
@@ -106,6 +111,8 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager)
 }

 void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
+    const auto guard = LockService();
+
    ASSERT(!port_installed);

    auto [server_port, client_port] =
@@ -115,17 +122,6 @@ void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
    port_installed = true;
 }

-std::shared_ptr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort(Kernel::KernelCore& kernel) {
-    ASSERT(!port_installed);
-
-    auto [server_port, client_port] =
-        Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name);
-    auto port = MakeResult(std::move(server_port)).Unwrap();
-    port->SetHleHandler(shared_from_this());
-    port_installed = true;
-    return client_port;
-}
-
 void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) {
    handlers.reserve(handlers.size() + n);
    for (std::size_t i = 0; i < n; ++i) {
@@ -164,6 +160,8 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
 }

 ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) {
+    const auto guard = LockService();
+
    switch (context.GetCommandType()) {
    case IPC::CommandType::Close: {
        IPC::ResponseBuilder rb{context, 2};
@@ -184,7 +182,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
        UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType());
    }

-    context.WriteToOutgoingCommandBuffer(context.GetThread());
+    // If emulation was shutdown, we are closing service threads, do not write the response back to
+    // memory that may be shutting down as well.
+    if (system.IsPoweredOn()) {
+        context.WriteToOutgoingCommandBuffer(context.GetThread());
+    }

    return RESULT_SUCCESS;
 }
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -5,9 +5,11 @@
 #pragma once

 #include <cstddef>
+#include <mutex>
 #include <string>
 #include <boost/container/flat_map.hpp>
 #include "common/common_types.h"
+#include "common/spin_lock.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"

@@ -68,11 +70,9 @@ public:
    void InstallAsService(SM::ServiceManager& service_manager);
    /// Creates a port pair and registers it on the kernel's global port registry.
    void InstallAsNamedPort(Kernel::KernelCore& kernel);
-    /// Creates and returns an unregistered port for the service.
-    std::shared_ptr<Kernel::ClientPort> CreatePort(Kernel::KernelCore& kernel);
-
+    /// Invokes a service request routine.
    void InvokeRequest(Kernel::HLERequestContext& ctx);
-
+    /// Handles a synchronization request for the service.
    ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override;

 protected:
@@ -80,6 +80,11 @@ protected:
    template <typename Self>
    using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&);

+    /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
+    [[nodiscard]] std::scoped_lock<Common::SpinLock> LockService() {
+        return std::scoped_lock{lock_service};
+    }
+
    /// System context that the service operates under.
    Core::System& system;

@@ -115,6 +120,9 @@ private:
    /// Function used to safely up-cast pointers to the derived class before invoking a handler.
    InvokerFn* handler_invoker;
    boost::container::flat_map<u32, FunctionInfoBase> handlers;
+
+    /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
+    Common::SpinLock lock_service;
 };

 /**
--- a/src/core/hle/service/sockets/blocking_worker.h
+++ b/src/core/hle/service/sockets/blocking_worker.h
@@ -1,161 +0,0 @@
-// Copyright 2020 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <memory>
-#include <string>
-#include <string_view>
-#include <thread>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
-#include "common/thread.h"
-#include "core/core.h"
-#include "core/hle/kernel/hle_ipc.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/writable_event.h"
-
-namespace Service::Sockets {
-
-/**
- * Worker abstraction to execute blocking calls on host without blocking the guest thread
- *
- * @tparam Service  Service where the work is executed
- * @tparam Types Types of work to execute
- */
-template <class Service, class... Types>
-class BlockingWorker {
-    using This = BlockingWorker<Service, Types...>;
-    using WorkVariant = std::variant<std::monostate, Types...>;
-
-public:
-    /// Create a new worker
-    static std::unique_ptr<This> Create(Core::System& system, Service* service,
-                                        std::string_view name) {
-        return std::unique_ptr<This>(new This(system, service, name));
-    }
-
-    ~BlockingWorker() {
-        while (!is_available.load(std::memory_order_relaxed)) {
-            // Busy wait until work is finished
-            std::this_thread::yield();
-        }
-        // Monostate means to exit the thread
-        work = std::monostate{};
-        work_event.Set();
-        thread.join();
-    }
-
-    /**
-     * Try to capture the worker to send work after a success
-     * @returns True when the worker has been successfully captured
-     */
-    bool TryCapture() {
-        bool expected = true;
-        return is_available.compare_exchange_weak(expected, false, std::memory_order_relaxed,
-                                                  std::memory_order_relaxed);
-    }
-
-    /**
-     * Send work to this worker abstraction
-     * @see TryCapture must be called before attempting to call this function
-     */
-    template <class Work>
-    void SendWork(Work new_work) {
-        ASSERT_MSG(!is_available, "Trying to send work on a worker that's not captured");
-        work = std::move(new_work);
-        work_event.Set();
-    }
-
-    /// Generate a callback for @see SleepClientThread
-    template <class Work>
-    auto Callback() {
-        return [this](std::shared_ptr<Kernel::Thread>, Kernel::HLERequestContext& ctx,
-                      Kernel::ThreadWakeupReason reason) {
-            ASSERT(reason == Kernel::ThreadWakeupReason::Signal);
-            std::get<Work>(work).Response(ctx);
-            is_available.store(true);
-        };
-    }
-
-    /// Get kernel event that will be signalled by the worker when the host operation finishes
-    std::shared_ptr<Kernel::WritableEvent> KernelEvent() const {
-        return kernel_event;
-    }
-
-private:
-    explicit BlockingWorker(Core::System& system, Service* service, std::string_view name) {
-        auto pair = Kernel::WritableEvent::CreateEventPair(system.Kernel(), std::string(name));
-        kernel_event = std::move(pair.writable);
-        thread = std::thread([this, &system, service, name] { Run(system, service, name); });
-    }
-
-    void Run(Core::System& system, Service* service, std::string_view name) {
-        system.RegisterHostThread();
-
-        const std::string thread_name = fmt::format("yuzu:{}", name);
-        MicroProfileOnThreadCreate(thread_name.c_str());
-        Common::SetCurrentThreadName(thread_name.c_str());
-
-        bool keep_running = true;
-        while (keep_running) {
-            work_event.Wait();
-
-            const auto visit_fn = [service, &keep_running]<typename T>(T&& w) {
-                if constexpr (std::is_same_v<std::decay_t<T>, std::monostate>) {
-                    keep_running = false;
-                } else {
-                    w.Execute(service);
-                }
-            };
-            std::visit(visit_fn, work);
-
-            kernel_event->Signal();
-        }
-    }
-
-    std::thread thread;
-    WorkVariant work;
-    Common::Event work_event;
-    std::shared_ptr<Kernel::WritableEvent> kernel_event;
-    std::atomic_bool is_available{true};
-};
-
-template <class Service, class... Types>
-class BlockingWorkerPool {
-    using Worker = BlockingWorker<Service, Types...>;
-
-public:
-    explicit BlockingWorkerPool(Core::System& system_, Service* service_)
-        : system{system_}, service{service_} {}
-
-    /// Returns a captured worker thread, creating new ones if necessary
-    Worker* CaptureWorker() {
-        for (auto& worker : workers) {
-            if (worker->TryCapture()) {
-                return worker.get();
-            }
-        }
-        auto new_worker = Worker::Create(system, service, fmt::format("BSD:{}", workers.size()));
-        [[maybe_unused]] const bool success = new_worker->TryCapture();
-        ASSERT(success);
-
-        return workers.emplace_back(std::move(new_worker)).get();
-    }
-
-private:
-    Core::System& system;
-    Service* const service;
-
-    std::vector<std::unique_ptr<Worker>> workers;
-};
-
-} // namespace Service::Sockets
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -178,13 +178,12 @@ void BSD::Poll(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout);

-    ExecuteWork(ctx, "BSD:Poll", timeout != 0,
-                PollWork{
-                    .nfds = nfds,
-                    .timeout = timeout,
-                    .read_buffer = ctx.ReadBuffer(),
-                    .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
-                });
+    ExecuteWork(ctx, PollWork{
+                         .nfds = nfds,
+                         .timeout = timeout,
+                         .read_buffer = ctx.ReadBuffer(),
+                         .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
+                     });
 }

 void BSD::Accept(Kernel::HLERequestContext& ctx) {
@@ -193,11 +192,10 @@ void BSD::Accept(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. fd={}", fd);

-    ExecuteWork(ctx, "BSD:Accept", IsBlockingSocket(fd),
-                AcceptWork{
-                    .fd = fd,
-                    .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
-                });
+    ExecuteWork(ctx, AcceptWork{
+                         .fd = fd,
+                         .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
+                     });
 }

 void BSD::Bind(Kernel::HLERequestContext& ctx) {
@@ -215,11 +213,10 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize());

-    ExecuteWork(ctx, "BSD:Connect", IsBlockingSocket(fd),
-                ConnectWork{
-                    .fd = fd,
-                    .addr = ctx.ReadBuffer(),
-                });
+    ExecuteWork(ctx, ConnectWork{
+                         .fd = fd,
+                         .addr = ctx.ReadBuffer(),
+                     });
 }

 void BSD::GetPeerName(Kernel::HLERequestContext& ctx) {
@@ -327,12 +324,11 @@ void BSD::Recv(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize());

-    ExecuteWork(ctx, "BSD:Recv", IsBlockingSocket(fd),
-                RecvWork{
-                    .fd = fd,
-                    .flags = flags,
-                    .message = std::vector<u8>(ctx.GetWriteBufferSize()),
-                });
+    ExecuteWork(ctx, RecvWork{
+                         .fd = fd,
+                         .flags = flags,
+                         .message = std::vector<u8>(ctx.GetWriteBufferSize()),
+                     });
 }

 void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
@@ -344,13 +340,12 @@ void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags,
              ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1));

-    ExecuteWork(ctx, "BSD:RecvFrom", IsBlockingSocket(fd),
-                RecvFromWork{
-                    .fd = fd,
-                    .flags = flags,
-                    .message = std::vector<u8>(ctx.GetWriteBufferSize(0)),
-                    .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)),
-                });
+    ExecuteWork(ctx, RecvFromWork{
+                         .fd = fd,
+                         .flags = flags,
+                         .message = std::vector<u8>(ctx.GetWriteBufferSize(0)),
+                         .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)),
+                     });
 }

 void BSD::Send(Kernel::HLERequestContext& ctx) {
@@ -361,12 +356,11 @@ void BSD::Send(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize());

-    ExecuteWork(ctx, "BSD:Send", IsBlockingSocket(fd),
-                SendWork{
-                    .fd = fd,
-                    .flags = flags,
-                    .message = ctx.ReadBuffer(),
-                });
+    ExecuteWork(ctx, SendWork{
+                         .fd = fd,
+                         .flags = flags,
+                         .message = ctx.ReadBuffer(),
+                     });
 }

 void BSD::SendTo(Kernel::HLERequestContext& ctx) {
@@ -377,13 +371,12 @@ void BSD::SendTo(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags,
              ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1));

-    ExecuteWork(ctx, "BSD:SendTo", IsBlockingSocket(fd),
-                SendToWork{
-                    .fd = fd,
-                    .flags = flags,
-                    .message = ctx.ReadBuffer(0),
-                    .addr = ctx.ReadBuffer(1),
-                });
+    ExecuteWork(ctx, SendToWork{
+                         .fd = fd,
+                         .flags = flags,
+                         .message = ctx.ReadBuffer(0),
+                         .addr = ctx.ReadBuffer(1),
+                     });
 }

 void BSD::Write(Kernel::HLERequestContext& ctx) {
@@ -392,12 +385,11 @@ void BSD::Write(Kernel::HLERequestContext& ctx) {

    LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize());

-    ExecuteWork(ctx, "BSD:Write", IsBlockingSocket(fd),
-                SendWork{
-                    .fd = fd,
-                    .flags = 0,
-                    .message = ctx.ReadBuffer(),
-                });
+    ExecuteWork(ctx, SendWork{
+                         .fd = fd,
+                         .flags = 0,
+                         .message = ctx.ReadBuffer(),
+                     });
 }

 void BSD::Close(Kernel::HLERequestContext& ctx) {
@@ -410,24 +402,9 @@ void BSD::Close(Kernel::HLERequestContext& ctx) {
 }

 template <typename Work>
-void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason,
-                      bool is_blocking, Work work) {
-    if (!is_blocking) {
-        work.Execute(this);
-        work.Response(ctx);
-        return;
-    }
-
-    // Signal a dummy response to make IPC validation happy
-    // This will be overwritten by the SleepClientThread callback
+void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, Work work) {
+    work.Execute(this);
    work.Response(ctx);
-
-    auto worker = worker_pool.CaptureWorker();
-
-    ctx.SleepClientThread(std::string(sleep_reason), std::numeric_limits<u64>::max(),
-                          worker->Callback<Work>(), worker->KernelEvent());
-
-    worker->SendWork(std::move(work));
 }

 std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) {
@@ -807,18 +784,6 @@ bool BSD::IsFileDescriptorValid(s32 fd) const noexcept {
    return true;
 }

-bool BSD::IsBlockingSocket(s32 fd) const noexcept {
-    // Inform invalid sockets as non-blocking
-    // This way we avoid using a worker thread as it will fail without blocking host
-    if (fd > static_cast<s32>(MAX_FD) || fd < 0) {
-        return false;
-    }
-    if (!file_descriptors[fd]) {
-        return false;
-    }
-    return (file_descriptors[fd]->flags & FLAG_O_NONBLOCK) != 0;
-}
-
 void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept {
    IPC::ResponseBuilder rb{ctx, 4};

@@ -827,8 +792,7 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co
    rb.PushEnum(bsd_errno);
 }

-BSD::BSD(Core::System& system_, const char* name)
-    : ServiceFramework{system_, name}, worker_pool{system_, this} {
+BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} {
    // clang-format off
    static const FunctionInfo functions[] = {
        {0, &BSD::RegisterClient, "RegisterClient"},
--- a/src/core/hle/service/sockets/bsd.h
+++ b/src/core/hle/service/sockets/bsd.h
@@ -11,7 +11,6 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/service.h"
-#include "core/hle/service/sockets/blocking_worker.h"
 #include "core/hle/service/sockets/sockets.h"

 namespace Core {
@@ -138,8 +137,7 @@ private:
    void Close(Kernel::HLERequestContext& ctx);

    template <typename Work>
-    void ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason,
-                     bool is_blocking, Work work);
+    void ExecuteWork(Kernel::HLERequestContext& ctx, Work work);

    std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol);
    std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer,
@@ -163,15 +161,10 @@ private:

    s32 FindFreeFileDescriptorHandle() noexcept;
    bool IsFileDescriptorValid(s32 fd) const noexcept;
-    bool IsBlockingSocket(s32 fd) const noexcept;

    void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept;

    std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors;
-
-    BlockingWorkerPool<BSD, PollWork, AcceptWork, ConnectWork, RecvWork, RecvFromWork, SendWork,
-                       SendToWork>
-        worker_pool;
 };

 class BSDCFG final : public ServiceFramework<BSDCFG> {
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -536,8 +536,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  transaction, flags);

-        const auto guard = nv_flinger.Lock();
-        auto& buffer_queue = nv_flinger.FindBufferQueue(id);
+        auto& buffer_queue = *nv_flinger.FindBufferQueue(id);

        switch (transaction) {
        case TransactionId::Connect: {
@@ -547,6 +546,9 @@ private:
                                 Settings::values.resolution_factor.GetValue()),
                static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
                                 Settings::values.resolution_factor.GetValue())};
+
+            buffer_queue.Connect();
+
            ctx.WriteBuffer(response.Serialize());
            break;
        }
@@ -563,40 +565,25 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            auto result = buffer_queue.DequeueBuffer(width, height);

-            if (result) {
-                // Buffer is available
-                IGBPDequeueBufferResponseParcel response{result->first, *result->second};
-                ctx.WriteBuffer(response.Serialize());
-            } else {
-                // Wait the current thread until a buffer becomes available
-                ctx.SleepClientThread(
-                    "IHOSBinderDriver::DequeueBuffer", UINT64_MAX,
-                    [=, this](std::shared_ptr<Kernel::Thread> thread,
-                              Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) {
-                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        const auto guard = nv_flinger.Lock();
-                        auto& buffer_queue = nv_flinger.FindBufferQueue(id);
-                        auto result = buffer_queue.DequeueBuffer(width, height);
-                        ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
+            do {
+                if (auto result = buffer_queue.DequeueBuffer(width, height); result) {
+                    // Buffer is available
+                    IGBPDequeueBufferResponseParcel response{result->first, *result->second};
+                    ctx.WriteBuffer(response.Serialize());
+                    break;
+                }
+            } while (buffer_queue.IsConnected());

-                        IGBPDequeueBufferResponseParcel response{result->first, *result->second};
-                        ctx.WriteBuffer(response.Serialize());
-                        IPC::ResponseBuilder rb{ctx, 2};
-                        rb.Push(RESULT_SUCCESS);
-                    },
-                    buffer_queue.GetWritableBufferWaitEvent());
-            }
            break;
        }
        case TransactionId::RequestBuffer: {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

            auto& buffer = buffer_queue.RequestBuffer(request.slot);
-
            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
+
            break;
        }
        case TransactionId::QueueBuffer: {
@@ -682,7 +669,7 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto& buffer_queue = nv_flinger.FindBufferQueue(id);
+        const auto& buffer_queue = *nv_flinger.FindBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,7 +4,6 @@

 #include <algorithm>
 #include <cstring>
-#include <mutex>
 #include <optional>
 #include <utility>

@@ -45,44 +44,16 @@ struct Memory::Impl {
        MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
    }

-    void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
-                     Common::MemoryHookPointer mmio_handler) {
-        UNIMPLEMENTED();
-    }
-
    void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
        ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
        ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
        MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped);
    }

-    void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                      Common::MemoryHookPointer hook) {
-        UNIMPLEMENTED();
-    }
-
-    void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                         Common::MemoryHookPointer hook) {
-        UNIMPLEMENTED();
-    }
-
    bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const {
        const auto& page_table = process.PageTable().PageTableImpl();
-
-        const u8* const page_pointer = page_table.pointers[vaddr >> PAGE_BITS];
-        if (page_pointer != nullptr) {
-            return true;
-        }
-
-        if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory) {
-            return true;
-        }
-
-        if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special) {
-            return false;
-        }
-
-        return false;
+        const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
+        return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
    }

    bool IsValidVirtualAddress(VAddr vaddr) const {
@@ -100,17 +71,15 @@ struct Memory::Impl {
    }

    u8* GetPointer(const VAddr vaddr) const {
-        u8* const page_pointer{current_page_table->pointers[vaddr >> PAGE_BITS]};
-        if (page_pointer) {
-            return page_pointer + vaddr;
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
+            return pointer + vaddr;
        }
-
-        if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
-            Common::PageType::RasterizerCachedMemory) {
+        const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer);
+        if (type == Common::PageType::RasterizerCachedMemory) {
            return GetPointerFromRasterizerCachedMemory(vaddr);
        }
-
-        return {};
+        return nullptr;
    }

    u8 Read8(const VAddr addr) {
@@ -222,7 +191,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -231,10 +201,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-
-                const u8* const src_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_buffer, src_ptr, copy_amount);
                break;
            }
@@ -268,7 +236,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -277,10 +246,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-
-                const u8* const src_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_buffer, src_ptr, copy_amount);
                break;
            }
@@ -320,7 +287,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -328,10 +296,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-
-                u8* const dest_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_ptr, src_buffer, copy_amount);
                break;
            }
@@ -364,7 +330,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -372,10 +339,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-
-                u8* const dest_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_ptr, src_buffer, copy_amount);
                break;
            }
@@ -414,7 +379,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -422,10 +388,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-
-                u8* dest_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                std::memset(dest_ptr, 0, copy_amount);
                break;
            }
@@ -461,7 +425,8 @@ struct Memory::Impl {
                std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
            const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

-            switch (page_table.attributes[page_index]) {
+            const auto [pointer, type] = page_table.pointers[page_index].PointerType();
+            switch (type) {
            case Common::PageType::Unmapped: {
                LOG_ERROR(HW_Memory,
                          "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -470,9 +435,8 @@ struct Memory::Impl {
                break;
            }
            case Common::PageType::Memory: {
-                DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr =
-                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
+                DEBUG_ASSERT(pointer);
+                const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
                WriteBlock(process, dest_addr, src_ptr, copy_amount);
                break;
            }
@@ -498,34 +462,19 @@ struct Memory::Impl {
        return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
    }

-    struct PageEntry {
-        u8* const pointer;
-        const Common::PageType attribute;
-    };
-
-    PageEntry SafePageEntry(std::size_t base) const {
-        std::lock_guard lock{rasterizer_cache_guard};
-        return {
-            .pointer = current_page_table->pointers[base],
-            .attribute = current_page_table->attributes[base],
-        };
-    }
-
    void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
-        std::lock_guard lock{rasterizer_cache_guard};
        if (vaddr == 0) {
            return;
        }
-
        // Iterate over a contiguous CPU address space, which corresponds to the specified GPU
        // address space, marking the region as un/cached. The region is marked un/cached at a
        // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size
        // is different). This assumes the specified GPU address region is contiguous as well.

-        u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
-        for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-            Common::PageType& page_type{current_page_table->attributes[vaddr >> PAGE_BITS]};
-
+        const u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
+        for (u64 i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
+            const Common::PageType page_type{
+                current_page_table->pointers[vaddr >> PAGE_BITS].Type()};
            if (cached) {
                // Switch page type to cached if now cached
                switch (page_type) {
@@ -534,8 +483,8 @@ struct Memory::Impl {
                    // space, for example, a system module need not have a VRAM mapping.
                    break;
                case Common::PageType::Memory:
-                    page_type = Common::PageType::RasterizerCachedMemory;
-                    current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
+                    current_page_table->pointers[vaddr >> PAGE_BITS].Store(
+                        nullptr, Common::PageType::RasterizerCachedMemory);
                    break;
                case Common::PageType::RasterizerCachedMemory:
                    // There can be more than one GPU region mapped per CPU region, so it's common
@@ -556,16 +505,16 @@ struct Memory::Impl {
                    // that this area is already unmarked as cached.
                    break;
                case Common::PageType::RasterizerCachedMemory: {
-                    u8* pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)};
+                    u8* const pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)};
                    if (pointer == nullptr) {
                        // It's possible that this function has been called while updating the
                        // pagetable after unmapping a VMA. In that case the underlying VMA will no
                        // longer exist, and we should just leave the pagetable entry blank.
-                        page_type = Common::PageType::Unmapped;
+                        current_page_table->pointers[vaddr >> PAGE_BITS].Store(
+                            nullptr, Common::PageType::Unmapped);
                    } else {
-                        current_page_table->pointers[vaddr >> PAGE_BITS] =
-                            pointer - (vaddr & ~PAGE_MASK);
-                        page_type = Common::PageType::Memory;
+                        current_page_table->pointers[vaddr >> PAGE_BITS].Store(
+                            pointer - (vaddr & ~PAGE_MASK), Common::PageType::Memory);
                    }
                    break;
                }
@@ -595,7 +544,7 @@ struct Memory::Impl {
            auto& gpu = system.GPU();
            for (u64 i = 0; i < size; i++) {
                const auto page = base + i;
-                if (page_table.attributes[page] == Common::PageType::RasterizerCachedMemory) {
+                if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
                    gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE);
                }
            }
@@ -610,20 +559,18 @@ struct Memory::Impl {
                       "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE);

            while (base != end) {
-                page_table.attributes[base] = type;
-                page_table.pointers[base] = nullptr;
+                page_table.pointers[base].Store(nullptr, type);
                page_table.backing_addr[base] = 0;

                base += 1;
            }
        } else {
            while (base != end) {
-                page_table.pointers[base] =
-                    system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS);
-                page_table.attributes[base] = type;
+                page_table.pointers[base].Store(
+                    system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS), type);
                page_table.backing_addr[base] = target - (base << PAGE_BITS);

-                ASSERT_MSG(page_table.pointers[base],
+                ASSERT_MSG(page_table.pointers[base].Pointer(),
                           "memory mapping base yield a nullptr within the table");

                base += 1;
@@ -646,21 +593,13 @@ struct Memory::Impl {
    template <typename T>
    T Read(const VAddr vaddr) {
        // Avoid adding any extra logic to this fast-path block
-        if (const u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) {
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
            T value;
            std::memcpy(&value, &pointer[vaddr], sizeof(T));
            return value;
        }
-
-        // Otherwise, we need to grab the page with a lock, in case it is currently being modified
-        const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
-        if (entry.pointer) {
-            T value;
-            std::memcpy(&value, &entry.pointer[vaddr], sizeof(T));
-            return value;
-        }
-
-        switch (entry.attribute) {
+        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
        case Common::PageType::Unmapped:
            LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
            return 0;
@@ -692,20 +631,12 @@ struct Memory::Impl {
    template <typename T>
    void Write(const VAddr vaddr, const T data) {
        // Avoid adding any extra logic to this fast-path block
-        if (u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) {
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
            std::memcpy(&pointer[vaddr], &data, sizeof(T));
            return;
        }
-
-        // Otherwise, we need to grab the page with a lock, in case it is currently being modified
-        const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
-        if (entry.pointer) {
-            // Memory was mapped, we are done
-            std::memcpy(&entry.pointer[vaddr], &data, sizeof(T));
-            return;
-        }
-
-        switch (entry.attribute) {
+        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
        case Common::PageType::Unmapped:
            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                      static_cast<u32>(data), vaddr);
@@ -726,15 +657,13 @@ struct Memory::Impl {

    template <typename T>
    bool WriteExclusive(const VAddr vaddr, const T data, const T expected) {
-        u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
-        if (page_pointer != nullptr) {
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
            // NOTE: Avoid adding any extra logic to this fast-path block
-            auto* pointer = reinterpret_cast<volatile T*>(&page_pointer[vaddr]);
-            return Common::AtomicCompareAndSwap(pointer, data, expected);
+            const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]);
+            return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
        }
-
-        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
-        switch (type) {
+        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
        case Common::PageType::Unmapped:
            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                      static_cast<u32>(data), vaddr);
@@ -755,15 +684,13 @@ struct Memory::Impl {
    }

    bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) {
-        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
-        if (page_pointer != nullptr) {
+        const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
+        if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
            // NOTE: Avoid adding any extra logic to this fast-path block
-            auto* pointer = reinterpret_cast<volatile u64*>(&page_pointer[vaddr]);
-            return Common::AtomicCompareAndSwap(pointer, data, expected);
+            const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]);
+            return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
        }
-
-        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
-        switch (type) {
+        switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
        case Common::PageType::Unmapped:
            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
                      static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
@@ -783,7 +710,6 @@ struct Memory::Impl {
        return true;
    }

-    mutable std::mutex rasterizer_cache_guard;
    Common::PageTable* current_page_table = nullptr;
    Core::System& system;
 };
@@ -799,25 +725,10 @@ void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size
    impl->MapMemoryRegion(page_table, base, size, target);
 }

-void Memory::MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
-                         Common::MemoryHookPointer mmio_handler) {
-    impl->MapIoRegion(page_table, base, size, std::move(mmio_handler));
-}
-
 void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
    impl->UnmapRegion(page_table, base, size);
 }

-void Memory::AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                          Common::MemoryHookPointer hook) {
-    impl->AddDebugHook(page_table, base, size, std::move(hook));
-}
-
-void Memory::RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                             Common::MemoryHookPointer hook) {
-    impl->RemoveDebugHook(page_table, base, size, std::move(hook));
-}
-
 bool Memory::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const {
    return impl->IsValidVirtualAddress(process, vaddr);
 }
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -8,7 +8,6 @@
 #include <memory>
 #include <string>
 #include "common/common_types.h"
-#include "common/memory_hook.h"

 namespace Common {
 struct PageTable;
@@ -77,17 +76,6 @@ public:
     */
    void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target);

-    /**
-     * Maps a region of the emulated process address space as a IO region.
-     *
-     * @param page_table   The page table of the emulated process.
-     * @param base         The address to start mapping at. Must be page-aligned.
-     * @param size         The amount of bytes to map. Must be page-aligned.
-     * @param mmio_handler The handler that backs the mapping.
-     */
-    void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
-                     Common::MemoryHookPointer mmio_handler);
-
    /**
     * Unmaps a region of the emulated process address space.
     *
@@ -97,28 +85,6 @@ public:
     */
    void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);

-    /**
-     * Adds a memory hook to intercept reads and writes to given region of memory.
-     *
-     * @param page_table The page table of the emulated process
-     * @param base       The starting address to apply the hook to.
-     * @param size       The size of the memory region to apply the hook to, in bytes.
-     * @param hook       The hook to apply to the region of memory.
-     */
-    void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                      Common::MemoryHookPointer hook);
-
-    /**
-     * Removes a memory hook from a given range of memory.
-     *
-     * @param page_table The page table of the emulated process.
-     * @param base       The starting address to remove the hook from.
-     * @param size       The size of the memory region to remove the hook from, in bytes.
-     * @param hook       The hook to remove from the specified region of memory.
-     */
-    void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
-                         Common::MemoryHookPointer hook);
-
    /**
     * Checks whether or not the supplied address is a valid virtual
     * address for the given process.
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -148,9 +148,4 @@ void RestoreGlobalState(bool is_powered_on) {
    values.motion_enabled.SetGlobal(true);
 }

-void Sanitize() {
-    values.use_asynchronous_gpu_emulation.SetValue(
-        values.use_asynchronous_gpu_emulation.GetValue() || values.use_multi_core.GetValue());
-}
-
 } // namespace Settings
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -131,6 +131,7 @@ struct Values {

    bool cpuopt_unsafe_unfuse_fma;
    bool cpuopt_unsafe_reduce_fp_error;
+    bool cpuopt_unsafe_inaccurate_nan;

    // Renderer
    Setting<RendererBackend> renderer_backend;
@@ -221,7 +222,7 @@ struct Values {
    bool disable_macro_jit;
    bool extended_logging;

-    // Misceallaneous
+    // Miscellaneous
    std::string log_filter;
    bool use_dev_keys;

@@ -257,7 +258,4 @@ void LogSettings();
 // Restore the global state of all applicable settings in the Values struct
 void RestoreGlobalState(bool is_powered_on);

-// Fixes settings that are known to cause issues with the emulator
-void Sanitize();
-
 } // namespace Settings
--- a/src/input_common/gcadapter/gc_adapter.h
+++ b/src/input_common/gcadapter/gc_adapter.h
@@ -120,17 +120,17 @@ private:
    /// For use in initialization, querying devices to find the adapter
    void Setup();

-    /// Resets status of all GC controller devices to a disconected state
+    /// Resets status of all GC controller devices to a disconnected state
    void ResetDevices();

-    /// Resets status of device connected to a disconected state
+    /// Resets status of device connected to a disconnected state
    void ResetDevice(std::size_t port);

    /// Returns true if we successfully gain access to GC Adapter
    bool CheckDeviceAccess();

    /// Captures GC Adapter endpoint address
-    /// Returns true if the endpoind was set correctly
+    /// Returns true if the endpoint was set correctly
    bool GetGCEndpoint(libusb_device* device);

    /// For shutting down, clear all data, join all threads, release usb
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -139,10 +139,10 @@ void GCButtonFactory::EndConfiguration() {

 class GCAnalog final : public Input::AnalogDevice {
 public:
-    explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_,
-                      const GCAdapter::Adapter* adapter, float range_)
-        : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter),
-          range(range_) {}
+    explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
+                      float deadzone_, float range_, const GCAdapter::Adapter* adapter)
+        : port(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
+          deadzone(deadzone_), range(range_), gcadapter(adapter) {}

    float GetAxis(u32 axis) const {
        if (gcadapter->DeviceConnected(port)) {
@@ -157,7 +157,12 @@ public:
    std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
        float x = GetAxis(analog_axis_x);
        float y = GetAxis(analog_axis_y);
-
+        if (invert_x) {
+            x = -x;
+        }
+        if (invert_y) {
+            y = -y;
+        }
        // Make sure the coordinates are in the unit circle,
        // otherwise normalize it.
        float r = x * x + y * y;
@@ -200,9 +205,11 @@ private:
    const u32 port;
    const u32 axis_x;
    const u32 axis_y;
+    const bool invert_x;
+    const bool invert_y;
    const float deadzone;
-    const GCAdapter::Adapter* gcadapter;
    const float range;
+    const GCAdapter::Adapter* gcadapter;
    mutable std::mutex mutex;
 };

@@ -223,8 +230,13 @@ std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::Param
    const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
    const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
    const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
+    const std::string invert_x_value = params.Get("invert_x", "+");
+    const std::string invert_y_value = params.Get("invert_y", "+");
+    const bool invert_x = invert_x_value == "-";
+    const bool invert_y = invert_y_value == "-";

-    return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get(), range);
+    return std::make_unique<GCAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
+                                      adapter.get());
 }

 void GCAnalogFactory::BeginConfiguration() {
@@ -282,6 +294,8 @@ Common::ParamPackage GCAnalogFactory::GetNextInput() {
        params.Set("port", controller_number);
        params.Set("axis_x", analog_x_axis);
        params.Set("axis_y", analog_y_axis);
+        params.Set("invert_x", "+");
+        params.Set("invert_y", "+");
        analog_x_axis = -1;
        analog_y_axis = -1;
        controller_number = -1;
--- a/src/input_common/motion_input.cpp
+++ b/src/input_common/motion_input.cpp
@@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) {
            rad_gyro += ki * integral_error;
            rad_gyro += kd * derivative_error;
        } else {
-            // Give more weight to acelerometer values to compensate for the lack of gyro
+            // Give more weight to accelerometer values to compensate for the lack of gyro
            rad_gyro += 35.0f * kp * real_error;
            rad_gyro += 10.0f * ki * integral_error;
            rad_gyro += 10.0f * kd * derivative_error;
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -20,7 +20,7 @@ enum class MouseButton {
    Left,
    Wheel,
    Right,
-    Foward,
+    Forward,
    Backward,
    Undefined,
 };
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -62,10 +62,10 @@ void MouseButtonFactory::EndConfiguration() {

 class MouseAnalog final : public Input::AnalogDevice {
 public:
-    explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, float range_,
-                         const MouseInput::Mouse* mouse_input_)
-        : button(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), range(range_),
-          mouse_input(mouse_input_) {}
+    explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
+                         float deadzone_, float range_, const MouseInput::Mouse* mouse_input_)
+        : button(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
+          deadzone(deadzone_), range(range_), mouse_input(mouse_input_) {}

    float GetAxis(u32 axis) const {
        std::lock_guard lock{mutex};
@@ -77,6 +77,12 @@ public:
    std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
        float x = GetAxis(analog_axis_x);
        float y = GetAxis(analog_axis_y);
+        if (invert_x) {
+            x = -x;
+        }
+        if (invert_y) {
+            y = -y;
+        }

        // Make sure the coordinates are in the unit circle,
        // otherwise normalize it.
@@ -104,6 +110,8 @@ private:
    const u32 button;
    const u32 axis_x;
    const u32 axis_y;
+    const bool invert_x;
+    const bool invert_y;
    const float deadzone;
    const float range;
    const MouseInput::Mouse* mouse_input;
@@ -128,8 +136,13 @@ std::unique_ptr<Input::AnalogDevice> MouseAnalogFactory::Create(
    const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
    const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
    const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
+    const std::string invert_x_value = params.Get("invert_x", "+");
+    const std::string invert_y_value = params.Get("invert_y", "+");
+    const bool invert_x = invert_x_value == "-";
+    const bool invert_y = invert_y_value == "-";

-    return std::make_unique<MouseAnalog>(port, axis_x, axis_y, deadzone, range, mouse_input.get());
+    return std::make_unique<MouseAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
+                                         mouse_input.get());
 }

 void MouseAnalogFactory::BeginConfiguration() {
@@ -153,6 +166,8 @@ Common::ParamPackage MouseAnalogFactory::GetNextInput() const {
            params.Set("port", static_cast<u16>(pad.button));
            params.Set("axis_x", 0);
            params.Set("axis_y", 1);
+            params.Set("invert_x", "+");
+            params.Set("invert_y", "+");
            return params;
        }
    }
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -352,13 +352,20 @@ private:
 class SDLAnalog final : public Input::AnalogDevice {
 public:
    explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
-                       float deadzone_, float range_)
-        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_),
-          range(range_) {}
+                       bool invert_x_, bool invert_y_, float deadzone_, float range_)
+        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
+          invert_y(invert_y_), deadzone(deadzone_), range(range_) {}

    std::tuple<float, float> GetStatus() const override {
-        const auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
+        auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
        const float r = std::sqrt((x * x) + (y * y));
+        if (invert_x) {
+            x = -x;
+        }
+        if (invert_y) {
+            y = -y;
+        }
+
        if (r > deadzone) {
            return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
                                   y / r * (r - deadzone) / (1 - deadzone));
@@ -386,6 +393,8 @@ private:
    std::shared_ptr<SDLJoystick> joystick;
    const int axis_x;
    const int axis_y;
+    const bool invert_x;
+    const bool invert_y;
    const float deadzone;
    const float range;
 };
@@ -572,12 +581,17 @@ public:
        const int axis_y = params.Get("axis_y", 1);
        const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
        const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
+        const std::string invert_x_value = params.Get("invert_x", "+");
+        const std::string invert_y_value = params.Get("invert_y", "+");
+        const bool invert_x = invert_x_value == "-";
+        const bool invert_y = invert_y_value == "-";
        auto joystick = state.GetSDLJoystickByGUID(guid, port);

        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
        joystick->SetAxis(axis_x, 0);
        joystick->SetAxis(axis_y, 0);
-        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone, range);
+        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
+                                           range);
    }

 private:
@@ -886,6 +900,8 @@ Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& gui
    params.Set("guid", guid);
    params.Set("axis_x", axis_x);
    params.Set("axis_y", axis_y);
+    params.Set("invert_x", "+");
+    params.Set("invert_y", "+");
    return params;
 }
 } // Anonymous namespace
@@ -1014,11 +1030,44 @@ public:
        }
        return {};
    }
-    [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(const SDL_Event& event) const {
+    [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(SDL_Event& event) {
        switch (event.type) {
        case SDL_JOYAXISMOTION:
-            if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
+            if (!axis_memory.count(event.jaxis.which) ||
+                !axis_memory[event.jaxis.which].count(event.jaxis.axis)) {
+                axis_memory[event.jaxis.which][event.jaxis.axis] = event.jaxis.value;
+                axis_event_count[event.jaxis.which][event.jaxis.axis] = 1;
                break;
+            } else {
+                axis_event_count[event.jaxis.which][event.jaxis.axis]++;
+                // The joystick and axis exist in our map if we take this branch, so no checks
+                // needed
+                if (std::abs(
+                        (event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis]) /
+                        32767.0) < 0.5) {
+                    break;
+                } else {
+                    if (axis_event_count[event.jaxis.which][event.jaxis.axis] == 2 &&
+                        IsAxisAtPole(event.jaxis.value) &&
+                        IsAxisAtPole(axis_memory[event.jaxis.which][event.jaxis.axis])) {
+                        // If we have exactly two events and both are near a pole, this is
+                        // likely a digital input masquerading as an analog axis; Instead of
+                        // trying to look at the direction the axis travelled, assume the first
+                        // event was press and the second was release; This should handle most
+                        // digital axes while deferring to the direction of travel for analog
+                        // axes
+                        event.jaxis.value = static_cast<Sint16>(
+                            std::copysign(32767, axis_memory[event.jaxis.which][event.jaxis.axis]));
+                    } else {
+                        // There are more than two events, so this is likely a true analog axis,
+                        // check the direction it travelled
+                        event.jaxis.value = static_cast<Sint16>(std::copysign(
+                            32767,
+                            event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis]));
+                    }
+                    axis_memory.clear();
+                    axis_event_count.clear();
+                }
            }
            [[fallthrough]];
        case SDL_JOYBUTTONUP:
@@ -1027,6 +1076,16 @@ public:
        }
        return std::nullopt;
    }
+
+private:
+    // Determine whether an axis value is close to an extreme or center
+    // Some controllers have a digital D-Pad as a pair of analog sticks, with 3 possible values per
+    // axis, which is why the center must be considered a pole
+    bool IsAxisAtPole(int16_t value) const {
+        return std::abs(value) >= 32767 || std::abs(value) < 327;
+    }
+    std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, int16_t>> axis_memory;
+    std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, uint32_t>> axis_event_count;
 };

 class SDLMotionPoller final : public SDLPoller {
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) {
 }

 void Client::OnPadData(Response::PadData data, std::size_t client) {
+    // Accept packets only for the correct pad
+    if (static_cast<u8>(clients[client].pad_index) != data.info.id) {
+        return;
+    }
+
    LOG_TRACE(Input, "PadData packet received");
    if (data.packet_counter == clients[client].packet_sequence) {
        LOG_WARNING(
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -28,14 +28,14 @@ private:
    mutable std::mutex mutex;
 };

-/// A motion device factory that creates motion devices from JC Adapter
+/// A motion device factory that creates motion devices from a UDP client
 UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_)
    : client(std::move(client_)) {}

 /**
 * Creates motion device
 * @param params contains parameters for creating the device:
- *     - "port": the nth jcpad on the adapter
+ *     - "port": the UDP port number
 */
 std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) {
    auto ip = params.Get("ip", "127.0.0.1");
@@ -90,14 +90,14 @@ private:
    mutable std::mutex mutex;
 };

-/// A motion device factory that creates motion devices from JC Adapter
+/// A motion device factory that creates motion devices from a UDP client
 UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_)
    : client(std::move(client_)) {}

 /**
 * Creates motion device
 * @param params contains parameters for creating the device:
- *     - "port": the nth jcpad on the adapter
+ *     - "port": the UDP port number
 */
 std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) {
    auto ip = params.Get("ip", "127.0.0.1");
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -4,8 +4,6 @@ add_executable(tests
    common/fibers.cpp
    common/param_package.cpp
    common/ring_buffer.cpp
-    core/arm/arm_test_common.cpp
-    core/arm/arm_test_common.h
    core/core_timing.cpp
    tests.cpp
 )
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
 }

 /** This test checks for fiber thread exchange configuration and validates that fibers are
- *  that a fiber has been succesfully transfered from one thread to another and that the TLS
+ *  that a fiber has been successfully transferred from one thread to another and that the TLS
 *  region of the thread is kept while changing fibers.
 */
 TEST_CASE("Fibers::InterExchange", "[common]") {
@@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
 }

 /** This test checks for one two threads racing for starting the same fiber.
- *  It checks execution occured in an ordered manner and by no time there were
+ *  It checks execution occurred in an ordered manner and by no time there were
 *  two contexts at the same time.
 */
 TEST_CASE("Fibers::StartRace", "[common]") {
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -1,145 +0,0 @@
-// Copyright 2016 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-
-#include "common/page_table.h"
-#include "core/core.h"
-#include "core/hle/kernel/memory/page_table.h"
-#include "core/hle/kernel/process.h"
-#include "core/memory.h"
-#include "tests/core/arm/arm_test_common.h"
-
-namespace ArmTests {
-
-TestEnvironment::TestEnvironment(bool mutable_memory_)
-    : mutable_memory(mutable_memory_),
-      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
-    auto& system = Core::System::GetInstance();
-
-    auto process = Kernel::Process::Create(system, "", Kernel::Process::ProcessType::Userland);
-    page_table = &process->PageTable().PageTableImpl();
-
-    system.Memory().MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
-    system.Memory().MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
-
-    kernel.MakeCurrentProcess(process.get());
-}
-
-TestEnvironment::~TestEnvironment() {
-    auto& system = Core::System::GetInstance();
-    system.Memory().UnmapRegion(*page_table, 0x80000000, 0x80000000);
-    system.Memory().UnmapRegion(*page_table, 0x00000000, 0x80000000);
-}
-
-void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) {
-    SetMemory32(vaddr + 0, static_cast<u32>(value));
-    SetMemory32(vaddr + 4, static_cast<u32>(value >> 32));
-}
-
-void TestEnvironment::SetMemory32(VAddr vaddr, u32 value) {
-    SetMemory16(vaddr + 0, static_cast<u16>(value));
-    SetMemory16(vaddr + 2, static_cast<u16>(value >> 16));
-}
-
-void TestEnvironment::SetMemory16(VAddr vaddr, u16 value) {
-    SetMemory8(vaddr + 0, static_cast<u8>(value));
-    SetMemory8(vaddr + 1, static_cast<u8>(value >> 8));
-}
-
-void TestEnvironment::SetMemory8(VAddr vaddr, u8 value) {
-    test_memory->data[vaddr] = value;
-}
-
-std::vector<WriteRecord> TestEnvironment::GetWriteRecords() const {
-    return write_records;
-}
-
-void TestEnvironment::ClearWriteRecords() {
-    write_records.clear();
-}
-
-TestEnvironment::TestMemory::~TestMemory() {}
-
-std::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
-    return true;
-}
-
-std::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
-    const auto iter = data.find(addr);
-
-    if (iter == data.end()) {
-        // Some arbitrary data
-        return static_cast<u8>(addr);
-    }
-
-    return iter->second;
-}
-
-std::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
-    return *Read8(addr) | static_cast<u16>(*Read8(addr + 1)) << 8;
-}
-
-std::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
-    return *Read16(addr) | static_cast<u32>(*Read16(addr + 2)) << 16;
-}
-
-std::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
-    return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
-}
-
-bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
-    VAddr addr = src_addr;
-    u8* data = static_cast<u8*>(dest_buffer);
-
-    for (std::size_t i = 0; i < size; i++, addr++, data++) {
-        *data = *Read8(addr);
-    }
-
-    return true;
-}
-
-bool TestEnvironment::TestMemory::Write8(VAddr addr, u8 data) {
-    env->write_records.emplace_back(8, addr, data);
-    if (env->mutable_memory)
-        env->SetMemory8(addr, data);
-    return true;
-}
-
-bool TestEnvironment::TestMemory::Write16(VAddr addr, u16 data) {
-    env->write_records.emplace_back(16, addr, data);
-    if (env->mutable_memory)
-        env->SetMemory16(addr, data);
-    return true;
-}
-
-bool TestEnvironment::TestMemory::Write32(VAddr addr, u32 data) {
-    env->write_records.emplace_back(32, addr, data);
-    if (env->mutable_memory)
-        env->SetMemory32(addr, data);
-    return true;
-}
-
-bool TestEnvironment::TestMemory::Write64(VAddr addr, u64 data) {
-    env->write_records.emplace_back(64, addr, data);
-    if (env->mutable_memory)
-        env->SetMemory64(addr, data);
-    return true;
-}
-
-bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer,
-                                             std::size_t size) {
-    VAddr addr = dest_addr;
-    const u8* data = static_cast<const u8*>(src_buffer);
-
-    for (std::size_t i = 0; i < size; i++, addr++, data++) {
-        env->write_records.emplace_back(8, addr, *data);
-        if (env->mutable_memory)
-            env->SetMemory8(addr, *data);
-    }
-
-    return true;
-}
-
-} // namespace ArmTests
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -1,93 +0,0 @@
-// Copyright 2016 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <tuple>
-#include <unordered_map>
-#include <vector>
-
-#include "common/common_types.h"
-#include "common/memory_hook.h"
-#include "core/hle/kernel/kernel.h"
-
-namespace Common {
-struct PageTable;
-}
-
-namespace ArmTests {
-
-struct WriteRecord {
-    WriteRecord(std::size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
-    std::size_t size;
-    VAddr addr;
-    u64 data;
-    bool operator==(const WriteRecord& o) const {
-        return std::tie(size, addr, data) == std::tie(o.size, o.addr, o.data);
-    }
-};
-
-class TestEnvironment final {
-public:
-    /*
-     * Inititalise test environment
-     * @param mutable_memory If false, writes to memory can never be read back.
-     *                       (Memory is immutable.)
-     */
-    explicit TestEnvironment(bool mutable_memory = false);
-
-    /// Shutdown test environment
-    ~TestEnvironment();
-
-    /// Sets value at memory location vaddr.
-    void SetMemory8(VAddr vaddr, u8 value);
-    void SetMemory16(VAddr vaddr, u16 value);
-    void SetMemory32(VAddr vaddr, u32 value);
-    void SetMemory64(VAddr vaddr, u64 value);
-
-    /**
-     * Whenever Memory::Write{8,16,32,64} is called within the test environment,
-     * a new write-record is made.
-     * @returns A vector of write records made since they were last cleared.
-     */
-    std::vector<WriteRecord> GetWriteRecords() const;
-
-    /// Empties the internal write-record store.
-    void ClearWriteRecords();
-
-private:
-    friend struct TestMemory;
-    struct TestMemory final : Common::MemoryHook {
-        explicit TestMemory(TestEnvironment* env_) : env(env_) {}
-        TestEnvironment* env;
-
-        ~TestMemory() override;
-
-        std::optional<bool> IsValidAddress(VAddr addr) override;
-
-        std::optional<u8> Read8(VAddr addr) override;
-        std::optional<u16> Read16(VAddr addr) override;
-        std::optional<u32> Read32(VAddr addr) override;
-        std::optional<u64> Read64(VAddr addr) override;
-
-        bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;
-
-        bool Write8(VAddr addr, u8 data) override;
-        bool Write16(VAddr addr, u16 data) override;
-        bool Write32(VAddr addr, u32 data) override;
-        bool Write64(VAddr addr, u64 data) override;
-
-        bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) override;
-
-        std::unordered_map<VAddr, u8> data;
-    };
-
-    bool mutable_memory;
-    std::shared_ptr<TestMemory> test_memory;
-    std::vector<WriteRecord> write_records;
-    Common::PageTable* page_table = nullptr;
-    Kernel::KernelCore kernel;
-};
-
-} // namespace ArmTests
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(video_core STATIC
    command_classes/vic.h
    compatible_formats.cpp
    compatible_formats.h
+    delayed_destruction_ring.h
    dirty_flags.cpp
    dirty_flags.h
    dma_pusher.cpp
@@ -47,6 +48,7 @@ add_library(video_core STATIC
    engines/shader_bytecode.h
    engines/shader_header.h
    engines/shader_type.h
+    framebuffer_config.h
    macro/macro.cpp
    macro/macro.h
    macro/macro_hle.cpp
@@ -58,10 +60,6 @@ add_library(video_core STATIC
    fence_manager.h
    gpu.cpp
    gpu.h
-    gpu_asynch.cpp
-    gpu_asynch.h
-    gpu_synch.cpp
-    gpu_synch.h
    gpu_thread.cpp
    gpu_thread.h
    guest_driver.cpp
@@ -84,14 +82,10 @@ add_library(video_core STATIC
    renderer_opengl/gl_device.h
    renderer_opengl/gl_fence_manager.cpp
    renderer_opengl/gl_fence_manager.h
-    renderer_opengl/gl_framebuffer_cache.cpp
-    renderer_opengl/gl_framebuffer_cache.h
    renderer_opengl/gl_rasterizer.cpp
    renderer_opengl/gl_rasterizer.h
    renderer_opengl/gl_resource_manager.cpp
    renderer_opengl/gl_resource_manager.h
-    renderer_opengl/gl_sampler_cache.cpp
-    renderer_opengl/gl_sampler_cache.h
    renderer_opengl/gl_shader_cache.cpp
    renderer_opengl/gl_shader_cache.h
    renderer_opengl/gl_shader_decompiler.cpp
@@ -113,8 +107,10 @@ add_library(video_core STATIC
    renderer_opengl/maxwell_to_gl.h
    renderer_opengl/renderer_opengl.cpp
    renderer_opengl/renderer_opengl.h
-    renderer_opengl/utils.cpp
-    renderer_opengl/utils.h
+    renderer_opengl/util_shaders.cpp
+    renderer_opengl/util_shaders.h
+    renderer_vulkan/blit_image.cpp
+    renderer_vulkan/blit_image.h
    renderer_vulkan/fixed_pipeline_state.cpp
    renderer_vulkan/fixed_pipeline_state.h
    renderer_vulkan/maxwell_to_vk.cpp
@@ -141,8 +137,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_fence_manager.h
    renderer_vulkan/vk_graphics_pipeline.cpp
    renderer_vulkan/vk_graphics_pipeline.h
-    renderer_vulkan/vk_image.cpp
-    renderer_vulkan/vk_image.h
    renderer_vulkan/vk_master_semaphore.cpp
    renderer_vulkan/vk_master_semaphore.h
    renderer_vulkan/vk_memory_manager.cpp
@@ -153,12 +147,8 @@ add_library(video_core STATIC
    renderer_vulkan/vk_query_cache.h
    renderer_vulkan/vk_rasterizer.cpp
    renderer_vulkan/vk_rasterizer.h
-    renderer_vulkan/vk_renderpass_cache.cpp
-    renderer_vulkan/vk_renderpass_cache.h
    renderer_vulkan/vk_resource_pool.cpp
    renderer_vulkan/vk_resource_pool.h
-    renderer_vulkan/vk_sampler_cache.cpp
-    renderer_vulkan/vk_sampler_cache.h
    renderer_vulkan/vk_scheduler.cpp
    renderer_vulkan/vk_scheduler.h
    renderer_vulkan/vk_shader_decompiler.cpp
@@ -179,8 +169,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_update_descriptor.h
    renderer_vulkan/wrapper.cpp
    renderer_vulkan/wrapper.h
-    sampler_cache.cpp
-    sampler_cache.h
    shader_cache.h
    shader_notify.cpp
    shader_notify.h
@@ -237,19 +225,32 @@ add_library(video_core STATIC
    shader/transform_feedback.h
    surface.cpp
    surface.h
+    texture_cache/accelerated_swizzle.cpp
+    texture_cache/accelerated_swizzle.h
+    texture_cache/decode_bc4.cpp
+    texture_cache/decode_bc4.h
+    texture_cache/descriptor_table.h
+    texture_cache/formatter.cpp
+    texture_cache/formatter.h
    texture_cache/format_lookup_table.cpp
    texture_cache/format_lookup_table.h
-    texture_cache/surface_base.cpp
-    texture_cache/surface_base.h
-    texture_cache/surface_params.cpp
-    texture_cache/surface_params.h
-    texture_cache/surface_view.cpp
-    texture_cache/surface_view.h
+    texture_cache/image_base.cpp
+    texture_cache/image_base.h
+    texture_cache/image_info.cpp
+    texture_cache/image_info.h
+    texture_cache/image_view_base.cpp
+    texture_cache/image_view_base.h
+    texture_cache/image_view_info.cpp
+    texture_cache/image_view_info.h
+    texture_cache/render_targets.h
+    texture_cache/samples_helper.h
+    texture_cache/slot_vector.h
    texture_cache/texture_cache.h
+    texture_cache/types.h
+    texture_cache/util.cpp
+    texture_cache/util.h
    textures/astc.cpp
    textures/astc.h
-    textures/convert.cpp
-    textures/convert.h
    textures/decoders.cpp
    textures/decoders.h
    textures/texture.cpp
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -118,20 +118,17 @@ public:
    /// Prepares the buffer cache for data uploading
    /// @param max_size Maximum number of bytes that will be uploaded
    /// @return True when a stream buffer invalidation was required, false otherwise
-    bool Map(std::size_t max_size) {
+    void Map(std::size_t max_size) {
        std::lock_guard lock{mutex};

-        bool invalidated;
-        std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
+        std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
        buffer_offset = buffer_offset_base;
-
-        return invalidated;
    }

    /// Finishes the upload stream
    void Unmap() {
        std::lock_guard lock{mutex};
-        stream_buffer->Unmap(buffer_offset - buffer_offset_base);
+        stream_buffer.Unmap(buffer_offset - buffer_offset_base);
    }

    /// Function called at the end of each frame, inteded for deferred operations
@@ -261,9 +258,9 @@ public:
 protected:
    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                         std::unique_ptr<StreamBuffer> stream_buffer_)
+                         StreamBuffer& stream_buffer_)
        : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
-          stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
+          stream_buffer{stream_buffer_} {}

    ~BufferCache() = default;

@@ -441,7 +438,7 @@ private:

        buffer_ptr += size;
        buffer_offset += size;
-        return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
+        return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
    }

    void AlignBuffer(std::size_t alignment) {
@@ -567,9 +564,7 @@ private:
    VideoCore::RasterizerInterface& rasterizer;
    Tegra::MemoryManager& gpu_memory;
    Core::Memory::Memory& cpu_memory;
-
-    std::unique_ptr<StreamBuffer> stream_buffer;
-    BufferType stream_buffer_handle;
+    StreamBuffer& stream_buffer;

    u8* buffer_ptr = nullptr;
    u64 buffer_offset = 0;
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -9,7 +9,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
-#include "video_core/texture_cache/surface_params.h"
+#include "video_core/textures/decoders.h"

 extern "C" {
 #include <libswscale/swscale.h>
@@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {

 void Vic::Execute() {
    if (output_surface_luma_address == 0) {
-        LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}",
+        LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
                  vic_state.output_surface.luma_offset);
        return;
    }
@@ -105,9 +105,9 @@ void Vic::Execute() {
            const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
                                                            block_height, 0);
            std::vector<u8> swizzled_data(size);
-            Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
-                                             swizzled_data.data(), converted_frame_buffer.get(),
-                                             false, block_height, 0, 1);
+            Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
+                                           frame->width, 4, swizzled_data.data(),
+                                           converted_frame_buffer.get(), block_height, 0, 0);

            gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
            gpu.Maxwell3D().OnMemoryWrite();
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -3,9 +3,9 @@
 // Refer to the license.txt file included.

 #include <array>
-#include <bitset>
 #include <cstddef>

+#include "common/common_types.h"
 #include "video_core/compatible_formats.h"
 #include "video_core/surface.h"

@@ -13,23 +13,25 @@ namespace VideoCore::Surface {

 namespace {

+using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
+
 // Compatibility table taken from Table 3.X.2 in:
 // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt

-constexpr std::array VIEW_CLASS_128_BITS = {
+constexpr std::array VIEW_CLASS_128_BITS{
    PixelFormat::R32G32B32A32_FLOAT,
    PixelFormat::R32G32B32A32_UINT,
    PixelFormat::R32G32B32A32_SINT,
 };

-constexpr std::array VIEW_CLASS_96_BITS = {
+constexpr std::array VIEW_CLASS_96_BITS{
    PixelFormat::R32G32B32_FLOAT,
 };
 // Missing formats:
 // PixelFormat::RGB32UI,
 // PixelFormat::RGB32I,

-constexpr std::array VIEW_CLASS_64_BITS = {
+constexpr std::array VIEW_CLASS_64_BITS{
    PixelFormat::R32G32_FLOAT,       PixelFormat::R32G32_UINT,
    PixelFormat::R32G32_SINT,        PixelFormat::R16G16B16A16_FLOAT,
    PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {

 // TODO: How should we handle 48 bits?

-constexpr std::array VIEW_CLASS_32_BITS = {
+constexpr std::array VIEW_CLASS_32_BITS{
    PixelFormat::R16G16_FLOAT,      PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
    PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT,     PixelFormat::R32_UINT,
    PixelFormat::R16G16_SINT,       PixelFormat::R32_SINT,        PixelFormat::A8B8G8R8_UNORM,
@@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {

 // TODO: How should we handle 24 bits?

-constexpr std::array VIEW_CLASS_16_BITS = {
+constexpr std::array VIEW_CLASS_16_BITS{
    PixelFormat::R16_FLOAT,  PixelFormat::R8G8_UINT,  PixelFormat::R16_UINT,
    PixelFormat::R16_SINT,   PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
    PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM,  PixelFormat::R8G8_SINT,
 };

-constexpr std::array VIEW_CLASS_8_BITS = {
+constexpr std::array VIEW_CLASS_8_BITS{
    PixelFormat::R8_UINT,
    PixelFormat::R8_UNORM,
    PixelFormat::R8_SINT,
    PixelFormat::R8_SNORM,
 };

-constexpr std::array VIEW_CLASS_RGTC1_RED = {
+constexpr std::array VIEW_CLASS_RGTC1_RED{
    PixelFormat::BC4_UNORM,
    PixelFormat::BC4_SNORM,
 };

-constexpr std::array VIEW_CLASS_RGTC2_RG = {
+constexpr std::array VIEW_CLASS_RGTC2_RG{
    PixelFormat::BC5_UNORM,
    PixelFormat::BC5_SNORM,
 };

-constexpr std::array VIEW_CLASS_BPTC_UNORM = {
+constexpr std::array VIEW_CLASS_BPTC_UNORM{
    PixelFormat::BC7_UNORM,
    PixelFormat::BC7_SRGB,
 };

-constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
+constexpr std::array VIEW_CLASS_BPTC_FLOAT{
    PixelFormat::BC6H_SFLOAT,
    PixelFormat::BC6H_UFLOAT,
 };

+constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
+    PixelFormat::ASTC_2D_4X4_UNORM,
+    PixelFormat::ASTC_2D_4X4_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
+    PixelFormat::ASTC_2D_5X4_UNORM,
+    PixelFormat::ASTC_2D_5X4_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
+    PixelFormat::ASTC_2D_5X5_UNORM,
+    PixelFormat::ASTC_2D_5X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
+    PixelFormat::ASTC_2D_6X5_UNORM,
+    PixelFormat::ASTC_2D_6X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
+    PixelFormat::ASTC_2D_6X6_UNORM,
+    PixelFormat::ASTC_2D_6X6_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
+    PixelFormat::ASTC_2D_8X5_UNORM,
+    PixelFormat::ASTC_2D_8X5_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
+    PixelFormat::ASTC_2D_8X8_UNORM,
+    PixelFormat::ASTC_2D_8X8_SRGB,
+};
+
+// Missing formats:
+// PixelFormat::ASTC_2D_10X5_UNORM
+// PixelFormat::ASTC_2D_10X5_SRGB
+
+// Missing formats:
+// PixelFormat::ASTC_2D_10X6_UNORM
+// PixelFormat::ASTC_2D_10X6_SRGB
+
+constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
+    PixelFormat::ASTC_2D_10X8_UNORM,
+    PixelFormat::ASTC_2D_10X8_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
+    PixelFormat::ASTC_2D_10X10_UNORM,
+    PixelFormat::ASTC_2D_10X10_SRGB,
+};
+
+// Missing formats
+// ASTC_2D_12X10_UNORM,
+// ASTC_2D_12X10_SRGB,
+
+constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
+    PixelFormat::ASTC_2D_12X12_UNORM,
+    PixelFormat::ASTC_2D_12X12_SRGB,
+};
+
 // Compatibility table taken from Table 4.X.1 in:
 // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt

-constexpr std::array COPY_CLASS_128_BITS = {
+constexpr std::array COPY_CLASS_128_BITS{
    PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
    PixelFormat::BC2_UNORM,         PixelFormat::BC2_SRGB,           PixelFormat::BC3_UNORM,
    PixelFormat::BC3_SRGB,          PixelFormat::BC5_UNORM,          PixelFormat::BC5_SNORM,
@@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
 // PixelFormat::RGBA32I
 // COMPRESSED_RG_RGTC2

-constexpr std::array COPY_CLASS_64_BITS = {
+constexpr std::array COPY_CLASS_64_BITS{
    PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
    PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
    PixelFormat::R16G16B16A16_SINT,  PixelFormat::R32G32_UINT,
@@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
 // COMPRESSED_RGBA_S3TC_DXT1_EXT
 // COMPRESSED_SIGNED_RED_RGTC1

-void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
-    compatiblity[format_a][format_b] = true;
-    compatiblity[format_b][format_a] = true;
+constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
+    table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
+    table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
 }

-void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
-    Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
+constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
+    Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
 }

 template <typename Range>
-void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
+constexpr void EnableRange(Table& table, const Range& range) {
    for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
        for (auto it_b = it_a; it_b != range.end(); ++it_b) {
-            Enable(compatibility, *it_a, *it_b);
+            Enable(table, *it_a, *it_b);
        }
    }
 }

-} // Anonymous namespace
+constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
+    const size_t a = static_cast<size_t>(format_a);
+    const size_t b = static_cast<size_t>(format_b);
+    return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
+}

-FormatCompatibility::FormatCompatibility() {
+constexpr Table MakeViewTable() {
+    Table view{};
    for (size_t i = 0; i < MaxPixelFormat; ++i) {
        // Identity is allowed
        Enable(view, i, i);
    }
-
    EnableRange(view, VIEW_CLASS_128_BITS);
    EnableRange(view, VIEW_CLASS_96_BITS);
    EnableRange(view, VIEW_CLASS_64_BITS);
@@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {
    EnableRange(view, VIEW_CLASS_RGTC2_RG);
    EnableRange(view, VIEW_CLASS_BPTC_UNORM);
    EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
+    EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
+    EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
+    return view;
+}

-    copy = view;
+constexpr Table MakeCopyTable() {
+    Table copy = MakeViewTable();
    EnableRange(copy, COPY_CLASS_128_BITS);
    EnableRange(copy, COPY_CLASS_64_BITS);
+    return copy;
+}
+
+} // Anonymous namespace
+
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
+    static constexpr Table TABLE = MakeViewTable();
+    return IsSupported(TABLE, format_a, format_b);
+}
+
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
+    static constexpr Table TABLE = MakeCopyTable();
+    return IsSupported(TABLE, format_a, format_b);
 }

 } // namespace VideoCore::Surface
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -4,31 +4,12 @@

 #pragma once

-#include <array>
-#include <bitset>
-#include <cstddef>
-
 #include "video_core/surface.h"

 namespace VideoCore::Surface {

-class FormatCompatibility {
-public:
-    using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
+bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);

-    explicit FormatCompatibility();
-
-    bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
-        return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
-    }
-
-    bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
-        return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
-    }
-
-private:
-    Table view;
-    Table copy;
-};
+bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);

 } // namespace VideoCore::Surface
--- a/src/video_core/delayed_destruction_ring.h
+++ b/src/video_core/delayed_destruction_ring.h
@@ -0,0 +1,32 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <utility>
+#include <vector>
+
+namespace VideoCommon {
+
+/// Container to push objects to be destroyed a few ticks in the future
+template <typename T, size_t TICKS_TO_DESTROY>
+class DelayedDestructionRing {
+public:
+    void Tick() {
+        index = (index + 1) % TICKS_TO_DESTROY;
+        elements[index].clear();
+    }
+
+    void Push(T&& object) {
+        elements[index].push_back(std::move(object));
+    }
+
+private:
+    size_t index = 0;
+    std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {
 using Tegra::Engines::Maxwell3D;

 void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+    FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
+    FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
+
    static constexpr std::size_t num_per_rt = NUM(rt[0]);
    static constexpr std::size_t begin = OFF(rt);
    static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
        FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
    }
    FillBlock(tables[1], begin, num, RenderTargets);
+    FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
+
+    tables[0][OFF(rt_control)] = RenderTargets;
+    tables[1][OFF(rt_control)] = RenderTargetControl;

    static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
    for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
 enum : u8 {
    NullEntry = 0,

+    Descriptors,
+
    RenderTargets,
+    RenderTargetControl,
    ColorBuffer0,
    ColorBuffer1,
    ColorBuffer2,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
yuzubot	6cff1a93ea	"Merge Tagged PR 1340"	2021-01-03 13:01:57 +00:00
bunnei	71e18dddbe	Merge pull request #5278 from MerryMage/cpuopt_unsafe_inaccurate_nan dynarmic: Add Unsafe_InaccurateNaN optimization	2021-01-03 03:27:29 -08:00
bunnei	f64456c7e2	Merge pull request #5279 from bunnei/buffer-queue-connect hle: service: nvflinger: buffer_queue: Do not reset id/layer_id on Connect.	2021-01-03 01:01:38 -08:00
Morph	ec58aabb26	Merge pull request #5281 from FearlessTobi/port-5668 Port citra-emu/citra#5668: "Update zstd to v1.4.8"	2021-01-03 12:25:21 +08:00
FearlessTobi	c90268127b	Update zstd to v1.4.8 Co-Authored-By: Vitor K <29167336+vitor-k@users.noreply.github.com>	2021-01-03 01:58:14 +01:00
bunnei	235b5d27ae	Merge pull request #5267 from lioncash/localize main: Make the loader error dialog fully translatable	2021-01-02 15:44:32 -08:00
bunnei	beaa25d777	hle: service: nvflinger: buffer_queue: Do not reset id/layer_id on Connect. - This behavior is a mistake, fixes Katana Zero.	2021-01-02 15:42:16 -08:00
MerryMage	8a5356357f	externals: Update dynarmic to 3806284cb	2021-01-02 20:42:11 +00:00
bunnei	62f67df6d7	Merge pull request #5277 from Morph1984/fix-comments general: Fix various spelling errors	2021-01-02 12:33:48 -08:00
bunnei	55fb8e7bdd	Merge pull request #5273 from timleg002/patch-1 typo fix	2021-01-02 12:31:19 -08:00
MerryMage	57c9da1b39	dynarmic: Add Unsafe_InaccurateNaN optimization	2021-01-02 20:13:21 +00:00
Morph	a745d87971	general: Fix various spelling errors	2021-01-02 10:23:41 -05:00
bunnei	1ff341f3dc	Merge pull request #5209 from Morph1984/refactor-controller-connect configure_input: Modify controller connection delay	2021-01-01 13:10:34 -08:00
Timotej Leginus	0d47c1d527	typo fix typo fix	2021-01-01 21:29:53 +01:00
LC	9e109849ff	Merge pull request #5271 from MerryMage/rm-mem-Special memory: Remove MemoryHook	2021-01-01 11:02:14 -05:00
Morph	904ac1daec	configure_input: Modify controller connection delay Increases the controller connection delay to 60ms and refactors it to attempt to disconnect all controllers prior to connecting all controllers in HID.	2021-01-01 06:39:24 -05:00
MerryMage	6d30745d77	memory: Remove MemoryHook	2021-01-01 11:34:38 +00:00
bunnei	eb318ffffc	Merge pull request #5249 from ReinUsesLisp/lock-free-pages core/memory: Read and write page table atomically	2021-01-01 02:54:01 -08:00
bunnei	0bddb794b0	Merge pull request #5239 from FearlessTobi/enable-translation .ci/templates: Enable QT translation for MSVC CI	2020-12-31 23:31:23 -08:00
Lioncash	8c27a74132	main: Make the loader error dialog fully translatable Makes the dialog fully localizable and also adds disambiguation comments to help translators understand what the formatting specifiers indicate.	2020-12-31 12:44:31 -05:00
Lioncash	803ac4ca59	main: Tidy up enum comparison enum classes are comparable with one another, so these casts aren't necessary.	2020-12-31 10:21:15 -05:00
bunnei	60121d8f28	Merge pull request #5264 from 16-Bit-Dog/patch-1 Make the coding conventions more consistant	2020-12-31 01:46:53 -08:00
bunnei	fb41c82aaa	Merge pull request #5265 from german77/port5509 Port citra-emu/citra#5509 "Look at direction of analog axis travel instead of instantaneous sample"	2020-12-30 22:24:30 -08:00
bunnei	25d607f5f6	Merge pull request #5208 from bunnei/service-threads Service threads	2020-12-30 22:06:05 -08:00
german	aa4c7687ee	Port citra-emu/citra#5509	2020-12-30 22:29:20 -06:00
16-Bit-Dog	fa5a1a4bfd	Make the coding conventions more consistant lut_index had 0 added when nothing was supposed to be added despite this, index was not added to 0 when nothing was supposed to be added...	2020-12-30 19:03:26 -05:00
bunnei	53e49e5360	Merge pull request #5263 from lioncash/uninit half_set: Resolve -Wmaybe-uninitialized warnings	2020-12-30 15:17:05 -08:00
Lioncash	bcafef4b94	half_set: Resolve -Wmaybe-uninitialized warnings	2020-12-30 17:59:42 -05:00
Rodrigo Locatti	dab7711524	Merge pull request #5260 from lioncash/uninit maxwell_to_vk: Initialize usage variable in SurfaceFormat()	2020-12-30 16:17:01 -03:00
Lioncash	f0d9ab0717	maxwell_to_vk: Initialize usage variable in SurfaceFormat() Silences a -Wmaybe-uninitialized warning	2020-12-30 13:25:03 -05:00
LC	da07977db0	Merge pull request #5251 from ReinUsesLisp/wuninitialized cmake: Enforce -Wuninitialized	2020-12-30 06:34:42 -05:00
bunnei	d5fe722a30	Merge pull request #4967 from ReinUsesLisp/new-texcache video_core/texture_cache: Rewrite the texture cache	2020-12-29 23:20:09 -08:00
ReinUsesLisp	9764c13d6d	video_core: Rewrite the texture cache The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.	2020-12-30 03:38:50 -03:00
ReinUsesLisp	ac2e2ebe97	cmake: Enforce -Wuninitialized	2020-12-30 02:58:58 -03:00
ReinUsesLisp	157fc2d785	service/pcie: Fix invalid initialization argument	2020-12-30 02:58:38 -03:00
ReinUsesLisp	9106ac1e6b	video_core: Add a delayed destruction ring abstraction	2020-12-30 02:10:19 -03:00
ReinUsesLisp	21b18057f7	host_shaders: Add Vulkan assembler compute shaders	2020-12-30 02:03:50 -03:00
ReinUsesLisp	87ff58b1d7	host_shaders: Add helper to blit depth stencil fragment shader	2020-12-30 02:02:07 -03:00
ReinUsesLisp	ae5725b709	host_shaders: Add texture color blit fragment shader	2020-12-30 02:00:48 -03:00
ReinUsesLisp	64fbf319f1	host_shaders: Add shaders to present to the swapchain	2020-12-30 01:59:12 -03:00
ReinUsesLisp	82b7daed9c	host_shaders: Add shaders to convert between depth and color images	2020-12-30 01:48:44 -03:00
ReinUsesLisp	dc81a90640	host_shaders: Add compute shader to copy BC4 as RG32UI to RGBA8	2020-12-30 01:47:08 -03:00
ReinUsesLisp	5169ce9fcd	host_shaders: Add shader to render a full screen triangle	2020-12-30 01:44:09 -03:00
ReinUsesLisp	59c46f9de9	host_shaders: Add pitch linear upload compute shader	2020-12-30 01:41:42 -03:00
ReinUsesLisp	12d16248dd	host_shaders: Add block linear upload compute shaders	2020-12-30 01:39:35 -03:00
ReinUsesLisp	f20e18f60d	host_shaders: Add copyright headers to OpenGL present shaders	2020-12-30 01:35:56 -03:00
ReinUsesLisp	95d156a150	video_core/host_shaders: Add support for prebuilt SPIR-V shaders Add support for building SPIR-V shaders from GLSL and generating headers to include the text of those same GLSL shaders to consume from OpenGL.	2020-12-30 01:29:07 -03:00
ReinUsesLisp	b3587102d1	core/memory: Read and write page table atomically Squash attributes into the pointer's integer, making them an uintptr_t pair containing 2 bits at the bottom and then the pointer. These bits are currently unused thanks to alignment requirements. Configure Dynarmic to mask out these bits on pointer reads. While we are at it, remove some unused attributes carried over from Citra. Read/Write and other hot functions use a two step unpacking process that is less readable to stop MSVC from emitting an extra AND instruction in the hot path: mov rdi,rcx shr rdx,0Ch mov r8,qword ptr [rax+8] mov rax,qword ptr [r8+rdx*8] mov rdx,rax -and al,3 and rdx,0FFFFFFFFFFFFFFFCh je Core::Memory::Memory::Impl::Read<unsigned char> mov rax,qword ptr [vaddr] movzx eax,byte ptr [rdx+rax]	2020-12-29 21:54:49 -03:00
bunnei	85cfd96f62	Merge pull request #5247 from comex/xx-concepts k_priority_queue: Fix concepts use	2020-12-29 16:50:20 -08:00
bunnei	82e0eeed21	hle: kernel: service_thread: Make thread naming more consistent.	2020-12-29 16:46:29 -08:00
bunnei	a2a0f5318d	hle: kernel: Manage service threads on another thread. - This is to allow service threads to defer destruction of themselves.	2020-12-29 16:46:29 -08:00
bunnei	69e82d01d5	common: ThreadWorker: Add class to help do asynchronous work.	2020-12-29 16:46:29 -08:00
bunnei	b02464f685	Merge pull request #5246 from comex/xx-include Add missing include of "core/hle/kernel/kernel.h"	2020-12-29 16:43:17 -08:00
bunnei	c192da3f82	hle: kernel: Manage host thread IDs using TLS. - Avoids the need to have a large map of host to guest thread IDs.	2020-12-29 15:55:30 -08:00
LC	8d55c8c855	Merge pull request #5248 from ReinUsesLisp/update-dynarmic externals: Update Dynarmic	2020-12-29 18:11:30 -05:00
ReinUsesLisp	3f048c8646	externals: Update Dynarmic Keeps yuzu up to date with the latest changes and introduces a change needed for a lock-free optimization our side.	2020-12-29 19:30:52 -03:00
comex	388cf58b31	k_priority_queue: Fix concepts use - For `std::same_as`, add missing include of `<concepts>`. - For `std::convertible_to`, create a replacement in `common/concepts.h` and use that instead. This would also be found in `<concepts>`, but unlike `std::same_as`, `std::convertible_to` is not yet implemented in libc++, LLVM's STL implementation - not even in master. (In fact, `std::same_as` is the only concept currently implemented. For some reason.)	2020-12-29 14:33:41 -05:00
comex	b36896b90e	Add missing include of "core/hle/kernel/kernel.h" This is needed as the header invokes methods on KernelCore.	2020-12-29 14:22:35 -05:00
LC	aa87278bf0	Merge pull request #5245 from ameerj/sleepthread-log svc: demote SleepThread log to LOG_TRACE	2020-12-29 14:03:24 -05:00
ameerj	0383363a8f	svc: demote SleepThread log to LOG_TRACE This log is called often, and introduces a lot of noise when debug logging is enabled, making it difficult to see other debug logs.	2020-12-29 14:01:56 -05:00
bunnei	22ba437aa4	Merge pull request #5236 from gal20/udp_client_patch input_common: process udp packets only for the correct pad	2020-12-29 02:51:40 -08:00
bunnei	dfdac7d38a	hle: kernel: Move ServiceThread ownership to KernelCore. - Fixes a circular dependency which prevented threads from being released on shutdown.	2020-12-29 01:12:39 -08:00
bunnei	f57be2e626	hle: kernel: service_thread: Add thread name and take weak_ptr of ServerSession.	2020-12-29 01:06:39 -08:00
bunnei	7d77a3f88f	hle: service: Acquire and release a lock on requests. - This makes it such that we can safely access service members from CoreTiming thread.	2020-12-28 21:33:34 -08:00
bunnei	c7a06908ae	audio_core: stream: Ensure buffer is valid before release.	2020-12-28 21:33:34 -08:00
bunnei	06f8c3dc01	core: Do not reset device_memory on shutdown. - This will be reset on initialization.	2020-12-28 21:33:34 -08:00
bunnei	d0649d0971	core: hle: kernel: Clear process list on boot.	2020-12-28 21:33:34 -08:00
bunnei	954341763a	gpu: gpu_thread: Ensure MicroProfile is shutdown on exit.	2020-12-28 21:33:34 -08:00
bunnei	994a9fec4e	hle: service: vi: Refactor to grab buffer only once.	2020-12-28 21:33:34 -08:00
bunnei	6433b1dfd6	service: nvflinger: Improve synchronization for BufferQueue. - Use proper mechanisms for blocking on DequeueBuffer. - Ensure service thread terminates on emulation Shutdown.	2020-12-28 21:33:34 -08:00
bunnei	bea51d948d	hle: service: Ensure system is powered on before writing IPC result.	2020-12-28 16:33:48 -08:00
bunnei	6d2f9428c5	core: kernel: Clear process list earlier.	2020-12-28 16:33:48 -08:00
bunnei	4991620f89	video_core: gpu_thread: Do not wait when system is powered down.	2020-12-28 16:33:48 -08:00
bunnei	916438a9de	core: settings: Untangle multicore from asynchronous GPU. - Now that GPU is always threaded, we can support multicore with synchronous GPU.	2020-12-28 16:33:48 -08:00
bunnei	40571c073f	video_core: gpu: Implement synchronous mode using threaded GPU.	2020-12-28 16:33:48 -08:00
bunnei	14c825bd1c	video_core: gpu: Refactor out synchronous/asynchronous GPU implementations. - We must always use a GPU thread now, even with synchronous GPU.	2020-12-28 16:33:48 -08:00
bunnei	5d4715cc6a	hle: kernel: hle_ipc: Remove SleepClientThread. - This was kind of hacky, and no longer is necessary with service threads.	2020-12-28 16:33:48 -08:00
bunnei	87d6588cb5	hle: service: bsd: Update to work with service threads, removing SleepClientThread.	2020-12-28 16:33:48 -08:00
bunnei	0c81b83ca9	hle: service: nvdrv: Revert #4981 to remove usage of SleepClientThread. - Note, this always processes the ioctl right away, which fixes BotW 1.0.0 issues.	2020-12-28 16:33:48 -08:00
bunnei	8bc3d66354	hle: kernel: service_thread: Add parameter for thread pool size.	2020-12-28 16:33:47 -08:00
bunnei	19a8f03ad5	hle: service: nvflinger: Refactor locking and interfaces.	2020-12-28 16:33:47 -08:00
bunnei	b377da042b	hle: service: vi: Remove usage of SleepClientThread.	2020-12-28 16:33:47 -08:00
bunnei	28281ae250	core: hle: server_session: Use separate threads for each service connection.	2020-12-28 16:33:47 -08:00
bunnei	7dbdda908c	Merge pull request #5233 from german77/inverted InputCommon: Allow to invert analog axis with right click	2020-12-28 14:06:21 -08:00
FearlessTobi	368b3ee227	.ci/templates: Enable QT translation for MSVC CI Previously this flag was missing, causing translation files not to be shipped with CI builds of yuzu.	2020-12-28 15:54:02 +01:00
gal20	1defd0847a	udp client: process packets only for the correct pad	2020-12-27 22:22:48 +02:00
german	80fece4e08	Allow to invert analog axis with right click	2020-12-26 17:46:14 -06:00