core: Move PageTable struct into Common.

Merge pull request #2244 from bunnei/gpu-mem-refactor
video_core: Refactor to use MemoryManager interface for all memory access.
2019-03-16 22:05:40 -04:00 · 2019-03-16 21:59:45 -04:00 · 2019-03-16 21:59:30 -04:00 · 2019-03-16 21:58:59 -04:00 · 2019-03-16 00:43:29 -04:00 · 2019-03-16 00:43:09 -04:00
167 changed files with 5236 additions and 3154 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - os: osx
      env: NAME="macos build"
      sudo: false
-      osx_image: xcode10
+      osx_image: xcode10.1
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@

 set -o pipefail

-export MACOSX_DEPLOYMENT_TARGET=10.13
+export MACOSX_DEPLOYMENT_TARGET=10.14
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -163,12 +163,6 @@ else()
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 endif()

-# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
-# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
-if (CMAKE_COMPILER_IS_GNUCC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
-endif()
-
 # Set file offset size to 64 bits.
 #
 # On modern Unixes, this is typically already the case. The lone exception is
@@ -185,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
 # System imported libraries
 # ======================

-find_package(Boost 1.63.0 QUIET)
+find_package(Boost 1.64.0 QUIET)
 if (NOT Boost_FOUND)
-    message(STATUS "Boost 1.63.0 or newer not found, falling back to externals")
+    message(STATUS "Boost 1.64.0 or newer not found, falling back to externals")

    set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
    set(Boost_NO_SYSTEM_PATHS OFF)
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
+    "${VIDEO_CORE}/shader/decode/texture.cpp"
    "${VIDEO_CORE}/shader/decode/other.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr

 It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.

-yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
+yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.

 yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,7 +12,7 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"

-#ifdef _MSC_VER
+#ifdef _WIN32
 #include <objbase.h>
 #endif

@@ -113,7 +113,7 @@ private:

 CubebSink::CubebSink(std::string_view target_device_name) {
    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
-#ifdef _MSC_VER
+#ifdef _WIN32
    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
 #endif

@@ -152,7 +152,7 @@ CubebSink::~CubebSink() {

    cubeb_destroy(ctx);

-#ifdef _MSC_VER
+#ifdef _WIN32
    if (SUCCEEDED(com_init_result)) {
        CoUninitialize();
    }
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -26,7 +26,7 @@ private:
    cubeb_devid output_device{};
    std::vector<SinkStreamPtr> sink_streams;

-#ifdef _MSC_VER
+#ifdef _WIN32
    u32 com_init_result = 0;
 #endif
 };
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
+      "${VIDEO_CORE}/shader/decode/texture.cpp"
      "${VIDEO_CORE}/shader/decode/other.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
@@ -91,10 +92,14 @@ add_library(common STATIC
    logging/text_formatter.cpp
    logging/text_formatter.h
    math_util.h
+    memory_hook.cpp
+    memory_hook.h
    microprofile.cpp
    microprofile.h
    microprofileui.h
    misc.cpp
+    page_table.cpp
+    page_table.h
    param_package.cpp
    param_package.h
    quaternion.h
@@ -113,6 +118,8 @@ add_library(common STATIC
    threadsafe_queue.h
    timer.cpp
    timer.h
+    uint128.cpp
+    uint128.h
    vector_math.h
    web_result.h
 )
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
 template <std::size_t Position, std::size_t Bits, typename T>
 struct BitField {
 private:
-    // We hide the copy assigment operator here, because the default copy
-    // assignment would copy the full storage value, rather than just the bits
-    // relevant to this particular bit field.
-    // We don't delete it because we want BitField to be trivially copyable.
-    constexpr BitField& operator=(const BitField&) = default;
-
    // UnderlyingType is T for non-enum types and the underlying type of T if
    // T is an enumeration. Note that T is wrapped within an enable_if in the
    // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
    BitField(T val) = delete;
    BitField& operator=(T val) = delete;

-    // Force default constructor to be created
-    // so that we can use this within unions
-    constexpr BitField() = default;
+    constexpr BitField() noexcept = default;
+
+    constexpr BitField(const BitField&) noexcept = default;
+    constexpr BitField& operator=(const BitField&) noexcept = default;
+
+    constexpr BitField(BitField&&) noexcept = default;
+    constexpr BitField& operator=(BitField&&) noexcept = default;

    constexpr FORCE_INLINE operator T() const {
        return Value();
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,8 +39,10 @@ public:
    Impl(Impl const&) = delete;
    const Impl& operator=(Impl const&) = delete;

-    void PushEntry(Entry e) {
-        message_queue.Push(std::move(e));
+    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
+                   const char* function, std::string message) {
+        message_queue.Push(
+            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
    }

    void AddBackend(std::unique_ptr<Backend> backend) {
@@ -108,11 +110,30 @@ private:
        backend_thread.join();
    }

+    Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
+                      const char* function, std::string message) const {
+        using std::chrono::duration_cast;
+        using std::chrono::steady_clock;
+
+        Entry entry;
+        entry.timestamp =
+            duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
+        entry.log_class = log_class;
+        entry.log_level = log_level;
+        entry.filename = Common::TrimSourcePath(filename);
+        entry.line_num = line_nr;
+        entry.function = function;
+        entry.message = std::move(message);
+
+        return entry;
+    }
+
    std::mutex writing_mutex;
    std::thread backend_thread;
    std::vector<std::unique_ptr<Backend>> backends;
    Common::MPSCQueue<Log::Entry> message_queue;
    Filter filter;
+    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
 };

 void ConsoleBackend::Write(const Entry& entry) {
@@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) {
 #undef LVL
 }

-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message) {
-    using std::chrono::duration_cast;
-    using std::chrono::steady_clock;
-
-    static steady_clock::time_point time_origin = steady_clock::now();
-
-    Entry entry;
-    entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
-    entry.log_class = log_class;
-    entry.log_level = log_level;
-    entry.filename = Common::TrimSourcePath(filename);
-    entry.line_num = line_nr;
-    entry.function = function;
-    entry.message = std::move(message);
-
-    return entry;
-}
-
 void SetGlobalFilter(const Filter& filter) {
    Impl::Instance().SetGlobalFilter(filter);
 }
@@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
    if (!filter.CheckMessage(log_class, log_level))
        return;

-    Entry entry =
-        CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
-
-    instance.PushEntry(std::move(entry));
+    instance.PushEntry(log_class, log_level, filename, line_num, function,
+                       fmt::vformat(format, args));
 }
 } // namespace Log
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class);
 */
 const char* GetLevelName(Level log_level);

-/// Creates a log entry by formatting the given source location, and message.
-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message);
-
 /**
 * The global filter will prevent any messages from even being processed if they are filtered. Each
 * backend can have a filter, but if the level is lower than the global filter, the backend will
--- a/src/common/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "core/memory_hook.h"
+#include "common/memory_hook.h"

-namespace Memory {
+namespace Common {

 MemoryHook::~MemoryHook() = default;

-} // namespace Memory
+} // namespace Common
--- a/src/common/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@

 #include "common/common_types.h"

-namespace Memory {
+namespace Common {

 /**
 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };

 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+}
+
+} // namespace Common
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
    hle/service/audio/audren_u.h
    hle/service/audio/codecctl.cpp
    hle/service/audio/codecctl.h
+    hle/service/audio/errors.h
    hle/service/audio/hwopus.cpp
    hle/service/audio/hwopus.h
    hle/service/bcat/bcat.cpp
@@ -436,8 +437,6 @@ add_library(core STATIC
    loader/xci.h
    memory.cpp
    memory.h
-    memory_hook.cpp
-    memory_hook.h
    memory_setup.h
    perf_stats.cpp
    perf_stats.h
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
        return std::max(parent.core_timing.GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return parent.core_timing.GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
    }

    ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,7 +12,7 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"

-namespace Memory {
+namespace Common {
 struct PageTable;
 }

@@ -70,7 +70,7 @@ private:
    Timing::CoreTiming& core_timing;
    DynarmicExclusiveMonitor& exclusive_monitor;

-    Memory::PageTable* current_page_table = nullptr;
+    Common::PageTable* current_page_table = nullptr;
 };

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
 #include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
    return vfs->OpenFile(path, FileSys::Mode::Read);
 }
 struct System::Impl {
+    explicit Impl(System& system) : kernel{system} {}

    Cpu& CurrentCpuCore() {
        return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
        LOG_DEBUG(HW_Memory, "initialized OK");

        core_timing.Initialize();
-        kernel.Initialize(core_timing);
+        kernel.Initialize();

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
            std::chrono::system_clock::now().time_since_epoch());
@@ -114,7 +116,7 @@ struct System::Impl {
        if (web_browser == nullptr)
            web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();

-        auto main_process = Kernel::Process::Create(kernel, "main");
+        auto main_process = Kernel::Process::Create(system, "main");
        kernel.MakeCurrentProcess(main_process.get());

        telemetry_session = std::make_unique<Core::TelemetrySession>();
@@ -128,10 +130,16 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());
+        is_powered_on = true;
+
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
+        } else {
+            gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
+        }

        cpu_core_manager.Initialize(system);
-        is_powered_on = true;
+
        LOG_DEBUG(Core, "Initialized OK");

        // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {

    void Shutdown() {
        // Log last frame performance stats
-        auto perf_results = GetAndResetPerfStats();
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
-                             perf_results.emulation_speed * 100.0);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
-                             perf_results.game_fps);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
-                             perf_results.frametime * 1000.0);
+        const auto perf_results = GetAndResetPerfStats();
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
+                                    perf_results.emulation_speed * 100.0);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
+                                    perf_results.game_fps);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
+                                    perf_results.frametime * 1000.0);

        is_powered_on = false;

@@ -265,7 +273,7 @@ struct System::Impl {
    Core::FrameLimiter frame_limiter;
 };

-System::System() : impl{std::make_unique<Impl>()} {}
+System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;

 Cpu& System::CurrentCpuCore() {
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
    return System::GetInstance().CurrentArmInterface();
 }

-inline TelemetrySession& Telemetry() {
-    return System::GetInstance().TelemetrySession();
-}
-
 inline Kernel::Process* CurrentProcess() {
    return System::GetInstance().CurrentProcess();
 }
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
 #endif
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/scheduler.h"
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
    return false;
 }

-Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
-         CpuBarrier& cpu_barrier, std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
+Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+         std::size_t core_index)
+    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
    }

-    scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
 }

 Cpu::~Cpu() = default;
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
 class Scheduler;
 }

+namespace Core {
+class System;
+}
+
 namespace Core::Timing {
 class CoreTiming;
 }
@@ -45,8 +49,8 @@ private:

 class Cpu {
 public:
-    Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
-        CpuBarrier& cpu_barrier, std::size_t core_index);
+    Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+        std::size_t core_index);
    ~Cpu();

    void RunLoop(bool tight_loop = true);
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"

 namespace Core::Timing {

@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }

+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.

 inline s64 msToCycles(int ms) {
    // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
    return cycles * 1000 / BASE_CLOCK_RATE;
 }

+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
    exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());

    for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] =
-            std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
+        cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
    }

    // Create threads for CPU cores 1-3, and build thread_to_cpu map
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@

 #pragma once

+#include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
-#include "core/hle/kernel/errors.h"
-#include "core/memory.h"

 namespace IPC {

--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/result.h"

 namespace IPC {

+constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
+
 class RequestHelperBase {
 protected:
    Kernel::HLERequestContext* context = nullptr;
@@ -350,7 +353,7 @@ public:
    template <class T>
    std::shared_ptr<T> PopIpcInterface() {
        ASSERT(context->Session()->IsDomain());
-        ASSERT(context->GetDomainMessageHeader()->input_object_count > 0);
+        ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
        return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
    }
 };
@@ -362,6 +365,11 @@ inline u32 RequestParser::Pop() {
    return cmdbuf[index++];
 }

+template <>
+inline s32 RequestParser::Pop() {
+    return static_cast<s32>(Pop<u32>());
+}
+
 template <typename T>
 void RequestParser::PopRaw(T& value) {
    std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -392,6 +400,16 @@ inline u64 RequestParser::Pop() {
    return msw << 32 | lsw;
 }

+template <>
+inline s8 RequestParser::Pop() {
+    return static_cast<s8>(Pop<u8>());
+}
+
+template <>
+inline s16 RequestParser::Pop() {
+    return static_cast<s16>(Pop<u16>());
+}
+
 template <>
 inline s64 RequestParser::Pop() {
    return static_cast<s64>(Pop<u64>());
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
@@ -17,32 +18,172 @@
 #include "core/hle/result.h"
 #include "core/memory.h"

-namespace Kernel::AddressArbiter {
+namespace Kernel {
+namespace {
+// Wake up num_to_wake (or all) threads in a vector.
+void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
+    // them all.
+    std::size_t last = waiting_threads.size();
+    if (num_to_wake > 0) {
+        last = num_to_wake;
+    }

-// Performs actual address waiting logic.
-static ResultCode WaitForAddress(VAddr address, s64 timeout) {
-    SharedPtr<Thread> current_thread = GetCurrentThread();
+    // Signal the waiting threads.
+    for (std::size_t i = 0; i < last; i++) {
+        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
+        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        waiting_threads[i]->SetArbiterWaitAddress(0);
+        waiting_threads[i]->ResumeFromWait();
+    }
+}
+} // Anonymous namespace
+
+AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
+AddressArbiter::~AddressArbiter() = default;
+
+ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
+                                           s32 num_to_wake) {
+    switch (type) {
+    case SignalType::Signal:
+        return SignalToAddressOnly(address, num_to_wake);
+    case SignalType::IncrementAndSignalIfEqual:
+        return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
+    case SignalType::ModifyByWaitingCountAndSignalIfEqual:
+        return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
+
+ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                              s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(value + 1));
+    return SignalToAddressOnly(address, num_to_wake);
+}
+
+ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                                         s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    // Get threads waiting on the address.
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+
+    // Determine the modified value depending on the waiting count.
+    s32 updated_value;
+    if (waiting_threads.empty()) {
+        updated_value = value - 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value + 1;
+    } else {
+        updated_value = value;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(updated_value));
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
+                                          s64 timeout_ns) {
+    switch (type) {
+    case ArbitrationType::WaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, false);
+    case ArbitrationType::DecrementAndWaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, true);
+    case ArbitrationType::WaitIfEqual:
+        return WaitForAddressIfEqual(address, value, timeout_ns);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
+
+ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                                    bool should_decrement) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const s32 cur_value = static_cast<s32>(Memory::Read32(address));
+    if (cur_value >= value) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (should_decrement) {
+        Memory::Write32(address, static_cast<u32>(cur_value - 1));
+    }
+
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddressImpl(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+    // Only wait for the address if equal.
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddressImpl(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();

    current_thread->WakeAfterDelay(timeout);

-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
    return RESULT_TIMEOUT;
 }

-// Gets the threads waiting on an address.
-static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr arb_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
+    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
+                                               std::vector<SharedPtr<Thread>>& waiting_threads,
+                                               VAddr arb_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
        const auto& thread_list = scheduler.GetThreadList();

        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr)
+            if (thread->GetArbiterWaitAddress() == arb_addr) {
                waiting_threads.push_back(thread);
+            }
        }
    };

@@ -61,118 +202,4 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)

    return threads;
 }
-
-// Wake up num_to_wake (or all) threads in a vector.
-static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
-    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0)
-        last = num_to_wake;
-
-    // Signal the waiting threads.
-    for (std::size_t i = 0; i < last; i++) {
-        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
-        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
-        waiting_threads[i]->SetArbiterWaitAddress(0);
-        waiting_threads[i]->ResumeFromWait();
-    }
-}
-
-// Signals an address being waited on.
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Signals an address being waited on and increments its value if equal to the value argument.
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(value + 1));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    return SignalToAddress(address, num_to_wake);
-}
-
-// Signals an address being waited on and modifies its value based on waiting thread count if equal
-// to the value argument.
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                         s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Get threads waiting on the address.
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    // Determine the modified value depending on the waiting count.
-    s32 updated_value;
-    if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-        updated_value = value + 1;
-    } else {
-        updated_value = value;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(updated_value));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Waits on an address if the value passed is less than the argument value, optionally decrementing.
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    s32 cur_value = static_cast<s32>(Memory::Read32(address));
-    if (cur_value < value) {
-        if (should_decrement) {
-            Memory::Write32(address, static_cast<u32>(cur_value - 1));
-        }
-    } else {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-
-// Waits on an address if the value passed is equal to the argument value.
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-    // Only wait for the address if equal.
-    if (static_cast<s32>(Memory::Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-} // namespace Kernel::AddressArbiter
+} // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,29 +4,77 @@

 #pragma once

+#include <vector>
+
 #include "common/common_types.h"
+#include "core/hle/kernel/object.h"

 union ResultCode;

-namespace Kernel::AddressArbiter {
+namespace Core {
+class System;
+}

-enum class ArbitrationType {
-    WaitIfLessThan = 0,
-    DecrementAndWaitIfLessThan = 1,
-    WaitIfEqual = 2,
+namespace Kernel {
+
+class Thread;
+
+class AddressArbiter {
+public:
+    enum class ArbitrationType {
+        WaitIfLessThan = 0,
+        DecrementAndWaitIfLessThan = 1,
+        WaitIfEqual = 2,
+    };
+
+    enum class SignalType {
+        Signal = 0,
+        IncrementAndSignalIfEqual = 1,
+        ModifyByWaitingCountAndSignalIfEqual = 2,
+    };
+
+    explicit AddressArbiter(Core::System& system);
+    ~AddressArbiter();
+
+    AddressArbiter(const AddressArbiter&) = delete;
+    AddressArbiter& operator=(const AddressArbiter&) = delete;
+
+    AddressArbiter(AddressArbiter&&) = default;
+    AddressArbiter& operator=(AddressArbiter&&) = delete;
+
+    /// Signals an address being waited on with a particular signaling type.
+    ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
+
+    /// Waits on an address with a particular arbitration type.
+    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
+
+private:
+    /// Signals an address being waited on.
+    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
+
+    /// Signals an address being waited on and increments its value if equal to the value argument.
+    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+
+    /// Signals an address being waited on and modifies its value based on waiting thread count if
+    /// equal to the value argument.
+    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                             s32 num_to_wake);
+
+    /// Waits on an address if the value passed is less than the argument value,
+    /// optionally decrementing.
+    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                        bool should_decrement);
+
+    /// Waits on an address if the value passed is equal to the argument value.
+    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
+
+    // Waits on the given address with a timeout in nanoseconds
+    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
+
+    // Gets the threads waiting on an address.
+    std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+
+    Core::System& system;
 };

-enum class SignalType {
-    Signal = 0,
-    IncrementAndSignalIfEqual = 1,
-    ModifyByWaitingCountAndSignalIfEqual = 2,
-};
-
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-
-} // namespace Kernel::AddressArbiter
+} // namespace Kernel
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,10 +33,11 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
    // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);

-    if (server_port->hle_handler)
-        server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
-    else
-        server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions));
+    if (server_port->HasHLEHandler()) {
+        server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
+    } else {
+        server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
+    }

    // Wake the threads waiting on the ServerPort
    server_port->WakeupAllWaitingThreads();
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
    // the emulated application.

-    // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they
+    // A local reference to the ServerSession is necessary to guarantee it
    // will be kept alive until after ClientDisconnected() returns.
    SharedPtr<ServerSession> server = parent->server;
    if (server) {
-        std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler;
-        if (hle_handler)
-            hle_handler->ClientDisconnected(server);
-
-        // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
-        // their WaitSynchronization result to 0xC920181A.
-
-        // Clean up the list of client threads with pending requests, they are unneeded now that the
-        // client endpoint is closed.
-        server->pending_requesting_threads.clear();
-        server->currently_handling = nullptr;
+        server->ClientDisconnected();
    }

    parent->client = nullptr;
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:

    ResultCode SendSyncRequest(SharedPtr<Thread> thread);

-    std::string name; ///< Name of client port (optional)
+private:
+    explicit ClientSession(KernelCore& kernel);
+    ~ClientSession() override;

    /// The parent session, which links to the server endpoint.
    std::shared_ptr<Session> parent;

-private:
-    explicit ClientSession(KernelCore& kernel);
-    ~ClientSession() override;
+    /// Name of the client session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
 void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
                                           bool incoming) {
    IPC::RequestParser rp(src_cmdbuf);
-    command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>());
+    command_header = rp.PopRaw<IPC::CommandHeader>();

    if (command_header->type == IPC::CommandType::Close) {
        // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_

    // If handle descriptor is present, add size of it
    if (command_header->enable_handle_descriptor) {
-        handle_descriptor_header =
-            std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
+        handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
        if (handle_descriptor_header->send_current_pid) {
            rp.Skip(2, false);
        }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
        // If this is an incoming message, only CommandType "Request" has a domain header
        // All outgoing domain messages have the domain header, if only incoming has it
        if (incoming || domain_message_header) {
-            domain_message_header =
-                std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
+            domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
        } else {
-            if (Session()->IsDomain())
+            if (Session()->IsDomain()) {
                LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
+            }
        }
    }

-    data_payload_header =
-        std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
+    data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();

    data_payload_offset = rp.GetCurrentOffset();

@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
        // Write the domain objects to the command buffer, these go after the raw untranslated data.
        // TODO(Subv): This completely ignores C buffers.
        std::size_t domain_offset = size - domain_message_header->num_objects;
-        auto& request_handlers = server_session->domain_request_handlers;

-        for (auto& object : domain_objects) {
-            request_handlers.emplace_back(object);
-            dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size());
+        for (const auto& object : domain_objects) {
+            server_session->AppendDomainRequestHandler(object);
+            dst_cmdbuf[domain_offset++] =
+                static_cast<u32_le>(server_session->NumDomainRequestHandlers());
        }
    }

--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -15,6 +16,8 @@
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/object.h"

+union ResultCode;
+
 namespace Service {
 class ServiceFrameworkBase;
 }
@@ -166,12 +169,12 @@ public:
        return buffer_c_desciptors;
    }

-    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
-        return domain_message_header.get();
+    const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
+        return domain_message_header.value();
    }

    bool HasDomainMessageHeader() const {
-        return domain_message_header != nullptr;
+        return domain_message_header.has_value();
    }

    /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:

    template <typename T>
    SharedPtr<T> GetCopyObject(std::size_t index) {
-        ASSERT(index < copy_objects.size());
-        return DynamicObjectCast<T>(copy_objects[index]);
+        return DynamicObjectCast<T>(copy_objects.at(index));
    }

    template <typename T>
    SharedPtr<T> GetMoveObject(std::size_t index) {
-        ASSERT(index < move_objects.size());
-        return DynamicObjectCast<T>(move_objects[index]);
+        return DynamicObjectCast<T>(move_objects.at(index));
    }

    void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:

    template <typename T>
    std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
-        return std::static_pointer_cast<T>(domain_request_handlers[index]);
+        return std::static_pointer_cast<T>(domain_request_handlers.at(index));
    }

    void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
    boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
    boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;

-    std::shared_ptr<IPC::CommandHeader> command_header;
-    std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header;
-    std::shared_ptr<IPC::DataPayloadHeader> data_payload_header;
-    std::shared_ptr<IPC::DomainMessageHeader> domain_message_header;
+    std::optional<IPC::CommandHeader> command_header;
+    std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
+    std::optional<IPC::DataPayloadHeader> data_payload_header;
+    std::optional<IPC::DomainMessageHeader> domain_message_header;
    std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@

 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 }

 struct KernelCore::Impl {
-    void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
+    explicit Impl(Core::System& system) : system{system} {}
+
+    void Initialize(KernelCore& kernel) {
        Shutdown();

        InitializeSystemResourceLimit(kernel);
-        InitializeThreads(core_timing);
+        InitializeThreads();
    }

    void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
        ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
    }

-    void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
+    void InitializeThreads() {
        thread_wakeup_event_type =
-            core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    std::atomic<u32> next_object_id{0};
@@ -145,15 +148,18 @@ struct KernelCore::Impl {
    /// Map of named ports managed by the kernel, which can be retrieved using
    /// the ConnectToPort SVC.
    NamedPortTable named_ports;
+
+    // System context
+    Core::System& system;
 };

-KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
+KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
 KernelCore::~KernelCore() {
    Shutdown();
 }

-void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
-    impl->Initialize(*this, core_timing);
+void KernelCore::Initialize() {
+    impl->Initialize(*this);
 }

 void KernelCore::Shutdown() {
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,10 @@
 template <typename T>
 class ResultVal;

+namespace Core {
+class System;
+}
+
 namespace Core::Timing {
 class CoreTiming;
 struct EventType;
@@ -18,6 +22,7 @@ struct EventType;

 namespace Kernel {

+class AddressArbiter;
 class ClientPort;
 class HandleTable;
 class Process;
@@ -30,7 +35,14 @@ private:
    using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;

 public:
-    KernelCore();
+    /// Constructs an instance of the kernel using the given System
+    /// instance as a context for any necessary system-related state,
+    /// such as threads, CPU core state, etc.
+    ///
+    /// @post After execution of the constructor, the provided System
+    ///       object *must* outlive the kernel instance itself.
+    ///
+    explicit KernelCore(Core::System& system);
    ~KernelCore();

    KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
    KernelCore& operator=(KernelCore&&) = delete;

    /// Resets the kernel to a clean slate for use.
-    ///
-    /// @param core_timing CoreTiming instance used to create any necessary
-    ///                    kernel-specific callback events.
-    ///
-    void Initialize(Core::Timing::CoreTiming& core_timing);
+    void Initialize();

    /// Clears all resources in use by the kernel instance.
    void Shutdown();
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -31,7 +31,7 @@ namespace {
 */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
    // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
+    Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);

    // Initialize new "main" thread
    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
@@ -53,9 +53,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
 CodeSet::CodeSet() = default;
 CodeSet::~CodeSet() = default;

-SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
-    SharedPtr<Process> process(new Process(kernel));
+SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
+    auto& kernel = system.Kernel();

+    SharedPtr<Process> process(new Process(system));
    process->name = std::move(name);
    process->resource_limit = kernel.GetSystemResourceLimit();
    process->status = ProcessStatus::Created;
@@ -132,7 +133,7 @@ void Process::PrepareForTermination() {
            if (thread->GetOwnerProcess() != this)
                continue;

-            if (thread == GetCurrentThread())
+            if (thread == system.CurrentScheduler().GetCurrentThread())
                continue;

            // TODO(Subv): When are the other running/ready threads terminated?
@@ -144,7 +145,6 @@ void Process::PrepareForTermination() {
        }
    };

-    const auto& system = Core::System::GetInstance();
    stop_threads(system.Scheduler(0).GetThreadList());
    stop_threads(system.Scheduler(1).GetThreadList());
    stop_threads(system.Scheduler(2).GetThreadList());
@@ -227,14 +227,12 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);

    // Clear instruction cache in CPU JIT
-    Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
+    system.InvalidateCpuInstructionCaches();
 }

-Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {}
-Kernel::Process::~Process() {}
+Process::Process(Core::System& system)
+    : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
+Process::~Process() = default;

 void Process::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -12,12 +12,17 @@
 #include <vector>
 #include <boost/container/static_vector.hpp>
 #include "common/common_types.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"

+namespace Core {
+class System;
+}
+
 namespace FileSys {
 class ProgramMetadata;
 }
@@ -116,7 +121,7 @@ public:

    static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;

-    static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name);
+    static SharedPtr<Process> Create(Core::System& system, std::string&& name);

    std::string GetTypeName() const override {
        return "Process";
@@ -150,6 +155,16 @@ public:
        return handle_table;
    }

+    /// Gets a reference to the process' address arbiter.
+    AddressArbiter& GetAddressArbiter() {
+        return address_arbiter;
+    }
+
+    /// Gets a const reference to the process' address arbiter.
+    const AddressArbiter& GetAddressArbiter() const {
+        return address_arbiter;
+    }
+
    /// Gets the current status of the process
    ProcessStatus GetStatus() const {
        return status;
@@ -251,7 +266,7 @@ public:
    void FreeTLSSlot(VAddr tls_address);

 private:
-    explicit Process(KernelCore& kernel);
+    explicit Process(Core::System& system);
    ~Process() override;

    /// Checks if the specified thread should wait until this process is available.
@@ -309,9 +324,16 @@ private:
    /// Per-process handle table for storing created object handles in.
    HandleTable handle_table;

+    /// Per-process address arbiter.
+    AddressArbiter address_arbiter;
+
    /// Random values for svcGetInfo RandomEntropy
    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;

+    /// System context
+    Core::System& system;
+
+    /// Name of this process
    std::string name;
 };

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {

 std::mutex Scheduler::scheduler_mutex;

-Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
+    : cpu_core{cpu_core}, system{system} {}

 Scheduler::~Scheduler() {
    for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {

 void Scheduler::SwitchContext(Thread* new_thread) {
    Thread* const previous_thread = GetCurrentThread();
-    Process* const previous_process = Core::CurrentProcess();
+    Process* const previous_process = system.Kernel().CurrentProcess();

    UpdateLastContextSwitchTime(previous_thread, previous_process);

@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {

        auto* const thread_owner_process = current_thread->GetOwnerProcess();
        if (previous_process != thread_owner_process) {
-            Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+            Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
        }

        cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {

 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;

    if (thread != nullptr) {
@@ -198,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);

    // Yield this thread -- sleep for zero time and force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 }

 void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
    ASSERT(priority < THREADPRIO_COUNT);

    // Sleep for zero time to be able to force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);

    Thread* suggested_thread = nullptr;

@@ -223,8 +222,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
    // Take the first non-nullptr one
    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
        const auto res =
-            Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(
-                core, priority);
+            system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);

        // If scheduler provides a suggested thread
        if (res != nullptr) {
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@

 namespace Core {
 class ARM_Interface;
-}
+class System;
+} // namespace Core

 namespace Kernel {

@@ -21,7 +22,7 @@ class Process;

 class Scheduler final {
 public:
-    explicit Scheduler(Core::ARM_Interface& cpu_core);
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
    ~Scheduler();

    /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
    Core::ARM_Interface& cpu_core;
    u64 last_context_switch_time = 0;

+    Core::System& system;
    static std::mutex scheduler_mutex;
 };

--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,6 +26,10 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
    return MakeResult(std::move(session));
 }

+void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
+    pending_sessions.push_back(std::move(pending_session));
+}
+
 bool ServerPort::ShouldWait(Thread* thread) const {
    // If there are no pending sessions, we wait until a new one is added.
    return pending_sessions.empty();
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -22,6 +22,8 @@ class SessionRequestHandler;

 class ServerPort final : public WaitObject {
 public:
+    using HLEHandler = std::shared_ptr<SessionRequestHandler>;
+
    /**
     * Creates a pair of ServerPort and an associated ClientPort.
     *
@@ -51,22 +53,27 @@ public:
     */
    ResultVal<SharedPtr<ServerSession>> Accept();

+    /// Whether or not this server port has an HLE handler available.
+    bool HasHLEHandler() const {
+        return hle_handler != nullptr;
+    }
+
+    /// Gets the HLE handler for this port.
+    HLEHandler GetHLEHandler() const {
+        return hle_handler;
+    }
+
    /**
     * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
     * will inherit a reference to this handler.
     */
-    void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) {
+    void SetHleHandler(HLEHandler hle_handler_) {
        hle_handler = std::move(hle_handler_);
    }

-    std::string name; ///< Name of port (optional)
-
-    /// ServerSessions waiting to be accepted by the port
-    std::vector<SharedPtr<ServerSession>> pending_sessions;
-
-    /// This session's HLE request handler template (optional)
-    /// ServerSessions created from this port inherit a reference to this handler.
-    std::shared_ptr<SessionRequestHandler> hle_handler;
+    /// Appends a ServerSession to the collection of ServerSessions
+    /// waiting to be accepted by this port.
+    void AppendPendingSession(SharedPtr<ServerSession> pending_session);

    bool ShouldWait(Thread* thread) const override;
    void Acquire(Thread* thread) override;
@@ -74,6 +81,16 @@ public:
 private:
    explicit ServerPort(KernelCore& kernel);
    ~ServerPort() override;
+
+    /// ServerSessions waiting to be accepted by the port
+    std::vector<SharedPtr<ServerSession>> pending_sessions;
+
+    /// This session's HLE request handler template (optional)
+    /// ServerSessions created from this port inherit a reference to this handler.
+    HLEHandler hle_handler;
+
+    /// Name of the port (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
    pending_requesting_threads.pop_back();
 }

-ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto* const domain_message_header = context.GetDomainMessageHeader();
-    if (domain_message_header) {
-        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
-        context.SetDomainRequestHandlers(domain_request_handlers);
-
-        // If there is a DomainMessageHeader, then this is CommandType "Request"
-        const u32 object_id{context.GetDomainMessageHeader()->object_id};
-        switch (domain_message_header->command) {
-        case IPC::DomainMessageHeader::CommandType::SendMessage:
-            if (object_id > domain_request_handlers.size()) {
-                LOG_CRITICAL(IPC,
-                             "object_id {} is too big! This probably means a recent service call "
-                             "to {} needed to return a new interface!",
-                             object_id, name);
-                UNREACHABLE();
-                return RESULT_SUCCESS; // Ignore error if asserts are off
-            }
-            return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
-
-        case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
-            LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
-
-            domain_request_handlers[object_id - 1] = nullptr;
-
-            IPC::ResponseBuilder rb{context, 2};
-            rb.Push(RESULT_SUCCESS);
-            return RESULT_SUCCESS;
-        }
-        }
-
-        LOG_CRITICAL(IPC, "Unknown domain command={}",
-                     static_cast<int>(domain_message_header->command.Value()));
-        ASSERT(false);
+void ServerSession::ClientDisconnected() {
+    // We keep a shared pointer to the hle handler to keep it alive throughout
+    // the call to ClientDisconnected, as ClientDisconnected invalidates the
+    // hle_handler member itself during the course of the function executing.
+    std::shared_ptr<SessionRequestHandler> handler = hle_handler;
+    if (handler) {
+        // Note that after this returns, this server session's hle_handler is
+        // invalidated (set to null).
+        handler->ClientDisconnected(this);
    }

+    // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
+    // their WaitSynchronization result to 0xC920181A.
+
+    // Clean up the list of client threads with pending requests, they are unneeded now that the
+    // client endpoint is closed.
+    pending_requesting_threads.clear();
+    currently_handling = nullptr;
+}
+
+void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
+    domain_request_handlers.push_back(std::move(handler));
+}
+
+std::size_t ServerSession::NumDomainRequestHandlers() const {
+    return domain_request_handlers.size();
+}
+
+ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
+    if (!context.HasDomainMessageHeader()) {
+        return RESULT_SUCCESS;
+    }
+
+    // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
+    context.SetDomainRequestHandlers(domain_request_handlers);
+
+    // If there is a DomainMessageHeader, then this is CommandType "Request"
+    const auto& domain_message_header = context.GetDomainMessageHeader();
+    const u32 object_id{domain_message_header.object_id};
+    switch (domain_message_header.command) {
+    case IPC::DomainMessageHeader::CommandType::SendMessage:
+        if (object_id > domain_request_handlers.size()) {
+            LOG_CRITICAL(IPC,
+                         "object_id {} is too big! This probably means a recent service call "
+                         "to {} needed to return a new interface!",
+                         object_id, name);
+            UNREACHABLE();
+            return RESULT_SUCCESS; // Ignore error if asserts are off
+        }
+        return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
+
+    case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
+        LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
+
+        domain_request_handlers[object_id - 1] = nullptr;
+
+        IPC::ResponseBuilder rb{context, 2};
+        rb.Push(RESULT_SUCCESS);
+        return RESULT_SUCCESS;
+    }
+    }
+
+    LOG_CRITICAL(IPC, "Unknown domain command={}",
+                 static_cast<int>(domain_message_header.command.Value()));
+    ASSERT(false);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
        return HANDLE_TYPE;
    }

+    Session* GetParent() {
+        return parent.get();
+    }
+
+    const Session* GetParent() const {
+        return parent.get();
+    }
+
    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;

    /**
@@ -78,23 +86,16 @@ public:

    void Acquire(Thread* thread) override;

-    std::string name;                ///< The name of this session (optional)
-    std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint.
-    std::shared_ptr<SessionRequestHandler>
-        hle_handler; ///< This session's HLE request handler (applicable when not a domain)
+    /// Called when a client disconnection occurs.
+    void ClientDisconnected();

-    /// This is the list of domain request handlers (after conversion to a domain)
-    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+    /// Adds a new domain request handler to the collection of request handlers within
+    /// this ServerSession instance.
+    void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);

-    /// List of threads that are pending a response after a sync request. This list is processed in
-    /// a LIFO manner, thus, the last request will be dispatched first.
-    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
-    std::vector<SharedPtr<Thread>> pending_requesting_threads;
-
-    /// Thread whose request is currently being handled. A request is considered "handled" when a
-    /// response is sent via svcReplyAndReceive.
-    /// TODO(Subv): Find a better name for this.
-    SharedPtr<Thread> currently_handling;
+    /// Retrieves the total number of domain request handlers that have been
+    /// appended to this ServerSession instance.
+    std::size_t NumDomainRequestHandlers() const;

    /// Returns true if the session has been converted to a domain, otherwise False
    bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
    /// object handle.
    ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);

+    /// The parent session, which links to the client endpoint.
+    std::shared_ptr<Session> parent;
+
+    /// This session's HLE request handler (applicable when not a domain)
+    std::shared_ptr<SessionRequestHandler> hle_handler;
+
+    /// This is the list of domain request handlers (after conversion to a domain)
+    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+
+    /// List of threads that are pending a response after a sync request. This list is processed in
+    /// a LIFO manner, thus, the last request will be dispatched first.
+    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
+    std::vector<SharedPtr<Thread>> pending_requesting_threads;
+
+    /// Thread whose request is currently being handled. A request is considered "handled" when a
+    /// response is sent via svcReplyAndReceive.
+    /// TODO(Subv): Find a better name for this.
+    SharedPtr<Thread> currently_handling;
+
    /// When set to True, converts the session to a domain at the end of the command
    bool convert_to_domain{};
+
+    /// The name of this session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
        shared_memory->backing_block_offset = 0;

        // Refresh the address mappings for the current process.
-        if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
+        if (kernel.CurrentProcess() != nullptr) {
+            kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                shared_memory->backing_block.get());
        }
    } else {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
    return address + size > address;
 }

-// Checks if a given address range lies within a larger address range.
-constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
-                                    VAddr address_range_end) {
-    const VAddr end_address = address + size - 1;
-    return address_range_begin <= address && end_address <= address_range_end - 1;
-}
-
-bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
-                                vm.GetAddressSpaceEndAddress());
-}
-
-bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
-                                vm.GetNewMapRegionEndAddress());
-}
-
 // 8 GiB
 constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;

@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                  src_addr, size);
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+    if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
                  dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
    auto* const current_process = Core::CurrentProcess();
    auto& vm_manager = current_process->VMManager();

-    if (!IsInsideAddressSpace(vm_manager, addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
    }

    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    if (!IsInsideAddressSpace(vm_manager, address, size)) {
+    if (!vm_manager.IsWithinAddressSpace(address, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Given address (0x{:016X}) is outside the bounds of the address space.", address);
        return ERR_INVALID_ADDRESS_STATE;
@@ -1300,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {

 /// Called when a thread exits
 static void ExitThread() {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+    auto& system = Core::System::GetInstance();

-    ExitCurrentThread();
-    Core::System::GetInstance().PrepareReschedule();
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->Stop();
+    system.CurrentScheduler().RemoveThread(current_thread);
+    system.PrepareReschedule();
 }

 /// Sleep the current thread
@@ -1316,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
        YieldAndWaitForLoadBalancing = -2,
    };

+    auto& system = Core::System::GetInstance();
+    auto& scheduler = system.CurrentScheduler();
+    auto* const current_thread = scheduler.GetCurrentThread();
+
    if (nanoseconds <= 0) {
-        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
        switch (static_cast<SleepType>(nanoseconds)) {
        case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithoutLoadBalancing(current_thread);
            break;
        case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithLoadBalancing(current_thread);
            break;
        case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            scheduler.YieldAndWaitForLoadBalancing(current_thread);
            break;
        default:
            UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
        }
    } else {
-        // Sleep current thread and check for next thread to schedule
-        WaitCurrentThread_Sleep();
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        GetCurrentThread()->WakeAfterDelay(nanoseconds);
+        current_thread->Sleep(nanoseconds);
    }

    // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
-        Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+        system.CpuCore(i).PrepareReschedule();
+    }
 }

 /// Wait process wide key atomic
@@ -1495,20 +1483,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
        return ERR_INVALID_ADDRESS;
    }

-    switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
-    case AddressArbiter::ArbitrationType::WaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
-    case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
-    case AddressArbiter::ArbitrationType::WaitIfEqual:
-        return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
-                  "or WaitIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
+    auto& address_arbiter =
+        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
 }

 // Signals to an address (via Address Arbiter)
@@ -1526,21 +1504,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
        return ERR_INVALID_ADDRESS;
    }

-    switch (static_cast<AddressArbiter::SignalType>(type)) {
-    case AddressArbiter::SignalType::Signal:
-        return AddressArbiter::SignalToAddress(address, num_to_wake);
-    case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
-        return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
-    case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
-                                                                             num_to_wake);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
-                  "or ModifyByWaitingCountAndSignalIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
+    auto& address_arbiter =
+        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

 /// This returns the total CPU ticks elapsed since the CPU was powered-on
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -68,11 +68,6 @@ void Thread::Stop() {
    owner_process->FreeTLSSlot(tls_address);
 }

-void WaitCurrentThread_Sleep() {
-    Thread* thread = GetCurrentThread();
-    thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
 void ExitCurrentThread() {
    Thread* thread = GetCurrentThread();
    thread->Stop();
@@ -184,8 +179,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
        return ERR_INVALID_PROCESSOR_ID;
    }

-    // TODO(yuriks): Other checks, returning 0xD9001BEA
-
    if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
        // TODO (bunnei): Find the correct error code to use here
@@ -393,6 +386,14 @@ void Thread::SetActivity(ThreadActivity value) {
    }
 }

+void Thread::Sleep(s64 nanoseconds) {
+    // Sleep current thread and check for next thread to schedule
+    SetStatus(ThreadStatus::WaitSleep);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    WakeAfterDelay(nanoseconds);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:

    void SetActivity(ThreadActivity value);

+    /// Sleeps this thread for the given amount of nanoseconds.
+    void Sleep(s64 nanoseconds);
+
 private:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;
@@ -460,14 +463,4 @@ private:
 */
 Thread* GetCurrentThread();

-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,18 +7,18 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"

 namespace Kernel {
-
-static const char* GetMemoryStateName(MemoryState state) {
+namespace {
+const char* GetMemoryStateName(MemoryState state) {
    static constexpr const char* names[] = {
        "Unmapped",         "Io",
        "Normal",           "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
    return names[ToSvcMemoryState(state)];
 }

+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+                                    VAddr address_range_end) {
+    const VAddr end_address = address + size - 1;
+    return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+} // Anonymous namespace
+
 bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
    ASSERT(base + size == next.base);
    if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {

 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                   MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
    // This is the appropriately sized VMA that will turn into our allocation.
    CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
    VirtualMemoryArea& final_vma = vma_handle->second;
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
 }

 ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
 }

 ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -618,7 +624,7 @@ void VMManager::ClearPageTable() {
    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
    page_table.special_regions.clear();
    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }

 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
    return address_space_width;
 }

+bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
+                                GetAddressSpaceEndAddress());
+}
+
 VAddr VMManager::GetASLRRegionBaseAddress() const {
    return aslr_region_base;
 }
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
    return code_region_end - code_region_base;
 }

+bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
+                                GetCodeRegionEndAddress());
+}
+
 VAddr VMManager::GetHeapRegionBaseAddress() const {
    return heap_region_base;
 }
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
    return heap_region_end - heap_region_base;
 }

+bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
+                                GetHeapRegionEndAddress());
+}
+
 VAddr VMManager::GetMapRegionBaseAddress() const {
    return map_region_base;
 }
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
    return map_region_end - map_region_base;
 }

+bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
+}
+
 VAddr VMManager::GetNewMapRegionBaseAddress() const {
    return new_map_region_base;
 }
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
    return new_map_region_end - new_map_region_base;
 }

+bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
+                                GetNewMapRegionEndAddress());
+}
+
 VAddr VMManager::GetTLSIORegionBaseAddress() const {
    return tls_io_region_base;
 }
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
    return tls_io_region_end - tls_io_region_base;
 }

+bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
+                                GetTLSIORegionEndAddress());
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"

 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
    // Settings for type = MMIO
    /// Physical address of the register area this VMA maps to.
    PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;

    /// Tests if this area can be merged to the right with `next`.
    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
     * @param mmio_handler The handler that will implement read and write for this MMIO region.
     */
    ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);

    /// Unmaps a range of addresses, splitting VMAs as necessary.
    ResultCode UnmapRange(VAddr target, u64 size);
@@ -432,18 +433,21 @@ public:
    /// Gets the address space width in bits.
    u64 GetAddressSpaceWidth() const;

+    /// Determines whether or not the given address range lies within the address space.
+    bool IsWithinAddressSpace(VAddr address, u64 size) const;
+
    /// Gets the base address of the ASLR region.
    VAddr GetASLRRegionBaseAddress() const;

    /// Gets the end address of the ASLR region.
    VAddr GetASLRRegionEndAddress() const;

-    /// Determines whether or not the specified address range is within the ASLR region.
-    bool IsWithinASLRRegion(VAddr address, u64 size) const;
-
    /// Gets the size of the ASLR region
    u64 GetASLRRegionSize() const;

+    /// Determines whether or not the specified address range is within the ASLR region.
+    bool IsWithinASLRRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the code region.
    VAddr GetCodeRegionBaseAddress() const;

@@ -453,6 +457,9 @@ public:
    /// Gets the total size of the code region in bytes.
    u64 GetCodeRegionSize() const;

+    /// Determines whether or not the specified range is within the code region.
+    bool IsWithinCodeRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the heap region.
    VAddr GetHeapRegionBaseAddress() const;

@@ -462,6 +469,9 @@ public:
    /// Gets the total size of the heap region in bytes.
    u64 GetHeapRegionSize() const;

+    /// Determines whether or not the specified range is within the heap region.
+    bool IsWithinHeapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the map region.
    VAddr GetMapRegionBaseAddress() const;

@@ -471,6 +481,9 @@ public:
    /// Gets the total size of the map region in bytes.
    u64 GetMapRegionSize() const;

+    /// Determines whether or not the specified range is within the map region.
+    bool IsWithinMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the new map region.
    VAddr GetNewMapRegionBaseAddress() const;

@@ -480,6 +493,9 @@ public:
    /// Gets the total size of the new map region in bytes.
    u64 GetNewMapRegionSize() const;

+    /// Determines whether or not the given address range is within the new map region
+    bool IsWithinNewMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the TLS IO region.
    VAddr GetTLSIORegionBaseAddress() const;

@@ -489,9 +505,12 @@ public:
    /// Gets the total size of the TLS IO region in bytes.
    u64 GetTLSIORegionSize() const;

+    /// Determines if the given address range is within the TLS IO region.
+    bool IsWithinTLSIORegion(VAddr address, u64 size) const;
+
    /// Each VMManager has its own page table, which is set as the main one when the owning process
    /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};

 private:
    using VMAIter = VMAMap::iterator;
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,19 +8,10 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"

 // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes

-/**
- * Detailed description of the error. Code 0 always means success.
- */
-enum class ErrorDescription : u32 {
-    Success = 0,
-    RemoteProcessDead = 301,
-};
-
 /**
 * Identifies the module which caused the error. Error codes can be propagated through a call
 * chain, meaning that this doesn't always correspond to the module where the API call made is
@@ -121,7 +112,7 @@ enum class ErrorModule : u32 {
    ShopN = 811,
 };

-/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields.
+/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
 union ResultCode {
    u32 raw;

@@ -134,17 +125,9 @@ union ResultCode {

    constexpr explicit ResultCode(u32 raw) : raw(raw) {}

-    constexpr ResultCode(ErrorModule module, ErrorDescription description)
-        : ResultCode(module, static_cast<u32>(description)) {}
-
    constexpr ResultCode(ErrorModule module_, u32 description_)
        : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}

-    constexpr ResultCode& operator=(const ResultCode& o) {
-        raw = o.raw;
-        return *this;
-    }
-
    constexpr bool IsSuccess() const {
        return raw == 0;
    }
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/frontend/applets/software_keyboard.h"
+#include "core/hle/result.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/software_keyboard.h"

--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
 #include <vector>

 #include "common/common_funcs.h"
+#include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/applets.h"

+union ResultCode;
+
 namespace Service::AM::Applets {

 enum class KeysetDisable : u32 {
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audout_u.h"
+#include "core/hle/service/audio/errors.h"
 #include "core/memory.h"

 namespace Service::Audio {

-namespace ErrCodes {
-enum {
-    ErrorUnknown = 2,
-    BufferCountExceeded = 8,
-};
-}
-
 constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
 constexpr int DefaultSampleRate{48000};

@@ -100,7 +94,7 @@ private:

        if (stream->IsPlaying()) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
+            rb.Push(ERR_OPERATION_FAILED);
            return;
        }

@@ -113,7 +107,9 @@ private:
    void StopAudioOut(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Audio, "called");

-        audio_core.StopStream(stream);
+        if (stream->IsPlaying()) {
+            audio_core.StopStream(stream);
+        }

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:

        if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
+            rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
+            return;
        }

        IPC::ResponseBuilder rb{ctx, 2};
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audren_u.h"
+#include "core/hle/service/audio/errors.h"

 namespace Service::Audio {

@@ -146,7 +147,7 @@ private:
        // code in this case.

        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(ResultCode{ErrorModule::Audio, 201});
+        rb.Push(ERR_NOT_SUPPORTED);
    }

    Kernel::EventPair system_event;
--- a/src/core/hle/service/audio/errors.h
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::Audio {
+
+constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
+constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
+constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
+
+} // namespace Service::Audio
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -9,43 +9,32 @@

 #include <opus.h>

-#include "common/common_funcs.h"
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/hwopus.h"

 namespace Service::Audio {
-
+namespace {
 struct OpusDeleter {
    void operator()(void* ptr) const {
        operator delete(ptr);
    }
 };

-class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
+
+struct OpusPacketHeader {
+    // Packet size in bytes.
+    u32_be size;
+    // Indicates the final range of the codec's entropy coder.
+    u32_be final_range;
+};
+static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
+
+class OpusDecoderStateBase {
 public:
-    IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
-                                u32 channel_count)
-        : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
-          sample_rate(sample_rate), channel_count(channel_count) {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
-            {1, nullptr, "SetContext"},
-            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
-            {3, nullptr, "SetContextForMultiStream"},
-            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
-            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-
-private:
    /// Describes extra behavior that may be asked of the decoding context.
    enum class ExtraBehavior {
        /// No extra behavior.
@@ -55,30 +44,36 @@ private:
        ResetContext,
    };

-    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
+    enum class PerfTime {
+        Disabled,
+        Enabled,
+    };

-        DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
-    }
-
-    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
-    }
-
-    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        IPC::RequestParser rp{ctx};
-        const auto extra_behavior =
-            rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, extra_behavior);
+    virtual ~OpusDecoderStateBase() = default;
+
+    // Decodes interleaved Opus packets. Optionally allows reporting time taken to
+    // perform the decoding, as well as any relevant extra behavior.
+    virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
+                                   ExtraBehavior extra_behavior) = 0;
+};
+
+// Represents the decoder state for a non-multistream decoder.
+class OpusDecoderState final : public OpusDecoderStateBase {
+public:
+    explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
+        : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
+
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
+                           ExtraBehavior extra_behavior) override {
+        if (perf_time == PerfTime::Disabled) {
+            DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
+        } else {
+            u64 performance = 0;
+            DecodeInterleavedHelper(ctx, &performance, extra_behavior);
+        }
    }

+private:
    void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
                                 ExtraBehavior extra_behavior) {
        u32 consumed = 0;
@@ -89,8 +84,7 @@ private:
            ResetDecoderContext();
        }

-        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
-                                       performance)) {
+        if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
            LOG_ERROR(Audio, "Failed to decode opus data");
            IPC::ResponseBuilder rb{ctx, 2};
            // TODO(ogniK): Use correct error code
@@ -109,27 +103,27 @@ private:
        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
    }

-    bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
-                                   std::vector<opus_int16>& output, u64* out_performance_time) {
+    bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
+                        std::vector<opus_int16>& output, u64* out_performance_time) const {
        const auto start_time = std::chrono::high_resolution_clock::now();
        const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
-        if (sizeof(OpusHeader) > input.size()) {
+        if (sizeof(OpusPacketHeader) > input.size()) {
            LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
-                      sizeof(OpusHeader), input.size());
+                      sizeof(OpusPacketHeader), input.size());
            return false;
        }

-        OpusHeader hdr{};
-        std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
-        if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
+        OpusPacketHeader hdr{};
+        std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
+        if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
            LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
-                      sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
+                      sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
            return false;
        }

-        const auto frame = input.data() + sizeof(OpusHeader);
+        const auto frame = input.data() + sizeof(OpusPacketHeader);
        const auto decoded_sample_count = opus_packet_get_nb_samples(
-            frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
+            frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
            static_cast<opus_int32>(sample_rate));
        if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
            LOG_ERROR(
@@ -141,18 +135,18 @@ private:

        const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
        const auto out_sample_count =
-            opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
+            opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
        if (out_sample_count < 0) {
            LOG_ERROR(Audio,
                      "Incorrect sample count received from opus_decode, "
                      "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
-                      out_sample_count, frame_size, static_cast<u32>(hdr.sz));
+                      out_sample_count, frame_size, static_cast<u32>(hdr.size));
            return false;
        }

        const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
        sample_count = out_sample_count;
-        consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
+        consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
        if (out_performance_time != nullptr) {
            *out_performance_time =
                std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -167,21 +161,66 @@ private:
        opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
    }

-    struct OpusHeader {
-        u32_be sz; // Needs to be BE for some odd reason
-        INSERT_PADDING_WORDS(1);
-    };
-    static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
-
-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
+    OpusDecoderPtr decoder;
    u32 sample_rate;
    u32 channel_count;
 };

-static std::size_t WorkerBufferSize(u32 channel_count) {
+class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+public:
+    explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
+        : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
+            {1, nullptr, "SetContext"},
+            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
+            {3, nullptr, "SetContextForMultiStream"},
+            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
+            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {7, nullptr, "DecodeInterleavedForMultiStream"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
+                                         OpusDecoderStateBase::ExtraBehavior::None);
+    }
+
+    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
+                                         OpusDecoderStateBase::ExtraBehavior::None);
+    }
+
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        IPC::RequestParser rp{ctx};
+        const auto extra_behavior = rp.Pop<bool>()
+                                        ? OpusDecoderStateBase::ExtraBehavior::ResetContext
+                                        : OpusDecoderStateBase::ExtraBehavior::None;
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
+                                         extra_behavior);
+    }
+
+    std::unique_ptr<OpusDecoderStateBase> decoder_state;
+};
+
+std::size_t WorkerBufferSize(u32 channel_count) {
    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
    return opus_decoder_get_size(static_cast<int>(channel_count));
 }
+} // Anonymous namespace

 void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
    const std::size_t worker_sz = WorkerBufferSize(channel_count);
    ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");

-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
-        static_cast<OpusDecoder*>(operator new(worker_sz))};
+    OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
    if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
        LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
        IPC::ResponseBuilder rb{ctx, 2};
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate,
-                                                     channel_count);
+    rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+        std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
 }

 HwOpus::HwOpus() : ServiceFramework("hwopus") {
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3

    auto& instance = Core::System::GetInstance();
    instance.GetPerfStats().EndGameFrame();
-    instance.Renderer().SwapBuffers(framebuffer);
+    instance.GPU().SwapBuffers(framebuffer);
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
@@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    auto& gpu = system_instance.GPU();
    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
    ASSERT(cpu_addr);
-    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+    gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);

    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
    return 0;
 }

-static void PushGPUEntries(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
-    dma_pusher.Push(std::move(entries));
-    dma_pusher.DispatchCalls();
-}
-
 u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
        UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
                params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
    Memory::ReadBlock(params.address, entries.data(),
                      params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {

            // There was no queued buffer to draw, render previous frame
            system_instance.GetPerfStats().EndGameFrame();
-            system_instance.Renderer().SwapBuffers({});
+            system_instance.GPU().SwapBuffers({});
            continue;
        }

--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -11,7 +11,6 @@
 #include "core/hle/ipc.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_port.h"
@@ -76,7 +75,8 @@ namespace Service {
 * Creates a function string for logging, complete with the name (or header code, depending
 * on what's passed in) the port name, and all the cmd_buff arguments.
 */
-[[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name,
+[[maybe_unused]] static std::string MakeFunctionString(std::string_view name,
+                                                       std::string_view port_name,
                                                       const u32* cmd_buff) {
    // Number of params == bits 0-5 + bits 6-11
    int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F);
@@ -158,9 +158,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
        return ReportUnimplementedFunction(ctx, info);
    }

-    LOG_TRACE(
-        Service, "{}",
-        MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str());
+    LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer()));
    handler_invoker(this, info->handler_callback, ctx);
 }

@@ -169,7 +167,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
    case IPC::CommandType::Close: {
        IPC::ResponseBuilder rb{context, 2};
        rb.Push(RESULT_SUCCESS);
-        return ResultCode(ErrorModule::HIPC, ErrorDescription::RemoteProcessDead);
+        return IPC::ERR_REMOTE_PROCESS_DEAD;
    }
    case IPC::CommandType::ControlWithContext:
    case IPC::CommandType::Control: {
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
    rb.Push(RESULT_SUCCESS);
-    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client};
+    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
    rb.PushMoveObjects(session);

    LOG_DEBUG(Service, "session={}", session->GetObjectId());
--- a/src/core/hle/service/sm/sm.h
+++ b/src/core/hle/service/sm/sm.h
@@ -67,7 +67,7 @@ public:
        if (port == nullptr) {
            return nullptr;
        }
-        return std::static_pointer_cast<T>(port->hle_handler);
+        return std::static_pointer_cast<T>(port->GetHLEHandler());
    }

    void InvokeControlRequest(Kernel::HLERequestContext& context);
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -24,6 +24,7 @@
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "core/hle/service/service.h"
 #include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"
 #include "core/hle/service/vi/vi_s.h"
@@ -33,6 +34,7 @@
 namespace Service::VI {

 constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
+constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5};
 constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
 constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};

@@ -1203,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService(
    RegisterHandlers(functions);
 }

-Module::Interface::Interface(std::shared_ptr<Module> module, const char* name,
-                             std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {}
+static bool IsValidServiceAccess(Permission permission, Policy policy) {
+    if (permission == Permission::User) {
+        return policy == Policy::User;
+    }

-Module::Interface::~Interface() = default;
+    if (permission == Permission::System || permission == Permission::Manager) {
+        return policy == Policy::User || policy == Policy::Compositor;
+    }

-void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_VI, "(STUBBED) called");
+    return false;
+}
+
+void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
+                                   std::shared_ptr<NVFlinger::NVFlinger> nv_flinger,
+                                   Permission permission) {
+    IPC::RequestParser rp{ctx};
+    const auto policy = rp.PopEnum<Policy>();
+
+    if (!IsValidServiceAccess(permission, policy)) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ERR_PERMISSION_DENIED);
+        return;
+    }

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger);
+    rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger));
 }

 void InstallInterfaces(SM::ServiceManager& service_manager,
                       std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) {
-    auto module = std::make_shared<Module>();
-    std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager);
-    std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager);
-    std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager);
+    std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager);
 }

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi.h
+++ b/src/core/hle/service/vi/vi.h
@@ -4,12 +4,21 @@

 #pragma once

-#include "core/hle/service/service.h"
+#include <memory>
+#include "common/common_types.h"
+
+namespace Kernel {
+class HLERequestContext;
+}

 namespace Service::NVFlinger {
 class NVFlinger;
 }

+namespace Service::SM {
+class ServiceManager;
+}
+
 namespace Service::VI {

 enum class DisplayResolution : u32 {
@@ -19,22 +28,25 @@ enum class DisplayResolution : u32 {
    UndockedHeight = 720,
 };

-class Module final {
-public:
-    class Interface : public ServiceFramework<Interface> {
-    public:
-        explicit Interface(std::shared_ptr<Module> module, const char* name,
-                           std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
-        ~Interface() override;
-
-        void GetDisplayService(Kernel::HLERequestContext& ctx);
-
-    protected:
-        std::shared_ptr<Module> module;
-        std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
-    };
+/// Permission level for a particular VI service instance
+enum class Permission {
+    User,
+    System,
+    Manager,
 };

+/// A policy type that may be requested via GetDisplayService and
+/// GetDisplayServiceWithProxyNameExchange
+enum class Policy {
+    User,
+    Compositor,
+};
+
+namespace detail {
+void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx,
+                           std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission);
+} // namespace detail
+
 /// Registers all VI services with the specified service manager.
 void InstallInterfaces(SM::ServiceManager& service_manager,
                       std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
--- a/src/core/hle/service/vi/vi_m.cpp
+++ b/src/core/hle/service/vi/vi_m.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_m.h"

 namespace Service::VI {

-VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) {
+VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} {
    static const FunctionInfo functions[] = {
        {2, &VI_M::GetDisplayService, "GetDisplayService"},
        {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>

 VI_M::~VI_M() = default;

+void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager);
+}
+
 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi_m.h
+++ b/src/core/hle/service/vi/vi_m.h
@@ -4,14 +4,27 @@

 #pragma once

-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}

 namespace Service::VI {

-class VI_M final : public Module::Interface {
+class VI_M final : public ServiceFramework<VI_M> {
 public:
-    explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
    ~VI_M() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi_s.cpp
+++ b/src/core/hle/service/vi/vi_s.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_s.h"

 namespace Service::VI {

-VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) {
+VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} {
    static const FunctionInfo functions[] = {
        {1, &VI_S::GetDisplayService, "GetDisplayService"},
        {3, nullptr, "GetDisplayServiceWithProxyNameExchange"},
@@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>

 VI_S::~VI_S() = default;

+void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System);
+}
+
 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi_s.h
+++ b/src/core/hle/service/vi/vi_s.h
@@ -4,14 +4,27 @@

 #pragma once

-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}

 namespace Service::VI {

-class VI_S final : public Module::Interface {
+class VI_S final : public ServiceFramework<VI_S> {
 public:
-    explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
    ~VI_S() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi_u.cpp
+++ b/src/core/hle/service/vi/vi_u.cpp
@@ -2,12 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/logging/log.h"
+#include "core/hle/service/vi/vi.h"
 #include "core/hle/service/vi/vi_u.h"

 namespace Service::VI {

-VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
-    : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) {
+VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger)
+    : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} {
    static const FunctionInfo functions[] = {
        {0, &VI_U::GetDisplayService, "GetDisplayService"},
    };
@@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger>

 VI_U::~VI_U() = default;

+void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_VI, "called");
+
+    detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User);
+}
+
 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi_u.h
+++ b/src/core/hle/service/vi/vi_u.h
@@ -4,14 +4,27 @@

 #pragma once

-#include "core/hle/service/vi/vi.h"
+#include "core/hle/service/service.h"
+
+namespace Kernel {
+class HLERequestContext;
+}
+
+namespace Service::NVFlinger {
+class NVFlinger;
+}

 namespace Service::VI {

-class VI_U final : public Module::Interface {
+class VI_U final : public ServiceFramework<VI_U> {
 public:
-    explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
+    explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger);
    ~VI_U() override;
+
+private:
+    void GetDisplayService(Kernel::HLERequestContext& ctx);
+
+    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
 };

 } // namespace Service::VI
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/page_table.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
@@ -18,13 +19,14 @@
 #include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/memory_setup.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"

 namespace Memory {

-static PageTable* current_page_table = nullptr;
+static Common::PageTable* current_page_table = nullptr;

-void SetCurrentPageTable(PageTable* page_table) {
+void SetCurrentPageTable(Common::PageTable* page_table) {
    current_page_table = page_table;

    auto& system = Core::System::GetInstance();
@@ -36,39 +38,20 @@ void SetCurrentPageTable(PageTable* page_table) {
    }
 }

-PageTable* GetCurrentPageTable() {
+Common::PageTable* GetCurrentPageTable() {
    return current_page_table;
 }

-PageTable::PageTable() = default;
-
-PageTable::PageTable(std::size_t address_space_width_in_bits) {
-    Resize(address_space_width_in_bits);
-}
-
-PageTable::~PageTable() = default;
-
-void PageTable::Resize(std::size_t address_space_width_in_bits) {
-    const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
-
-    pointers.resize(num_page_table_entries);
-    attributes.resize(num_page_table_entries);
-
-    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
-    // vector size is subsequently decreased (via resize), the vector might not automatically
-    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
-    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
-
-    pointers.shrink_to_fit();
-    attributes.shrink_to_fit();
-}
-
-static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
+static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
+                     Common::PageType type) {
    LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
              (base + size) * PAGE_SIZE);

-    RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
-                                 FlushMode::FlushAndInvalidate);
+    // During boot, current_page_table might not be set yet, in which case we need not flush
+    if (current_page_table) {
+        Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
+                                                                   size * PAGE_SIZE);
+    }

    VAddr end = base + size;
    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -88,41 +71,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
    }
 }

-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) {
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
 }

-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) {
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);

    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

-void UnmapRegion(PageTable& page_table, VAddr base, u64 size) {
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
    ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
    ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
-    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+    MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);

    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
    page_table.special_regions.erase(interval);
 }

-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook) {
    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.add(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) {
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook) {
    auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
-    SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)};
-    page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region}));
+    Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
+    page_table.special_regions.subtract(
+        std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
 }

 /**
@@ -171,22 +160,19 @@ T Read(const VAddr vaddr) {
        return value;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
        LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
        return 0;
-    case PageType::Memory:
+    case Common::PageType::Memory:
        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
        break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush);
-
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
        T value;
-        std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
+        std::memcpy(&value, host_ptr, sizeof(T));
        return value;
    }
    default:
@@ -204,21 +190,19 @@ void Write(const VAddr vaddr, const T data) {
        return;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
-    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+    Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
-    case PageType::Unmapped:
+    case Common::PageType::Unmapped:
        LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
                  static_cast<u32>(data), vaddr);
        return;
-    case PageType::Memory:
+    case Common::PageType::Memory:
        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
        break;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
+    case Common::PageType::RasterizerCachedMemory: {
+        auto host_ptr{GetPointerFromVMA(vaddr)};
+        Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
+        std::memcpy(host_ptr, &data, sizeof(T));
        break;
    }
    default:
@@ -233,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
    if (page_pointer)
        return true;

-    if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory)
+    if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
        return true;

-    if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special)
+    if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
        return false;

    return false;
@@ -256,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
        return page_pointer + (vaddr & PAGE_MASK);
    }

-    if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
+    if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
+        Common::PageType::RasterizerCachedMemory) {
        return GetPointerFromVMA(vaddr);
    }

@@ -290,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {

    u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
    for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
-        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
+        Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];

        if (cached) {
            // Switch page type to cached if now cached
            switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                // It is not necessary for a process to have this region mapped into its address
                // space, for example, a system module need not have a VRAM mapping.
                break;
-            case PageType::Memory:
-                page_type = PageType::RasterizerCachedMemory;
+            case Common::PageType::Memory:
+                page_type = Common::PageType::RasterizerCachedMemory;
                current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
                break;
-            case PageType::RasterizerCachedMemory:
+            case Common::PageType::RasterizerCachedMemory:
                // There can be more than one GPU region mapped per CPU region, so it's common that
                // this area is already marked as cached.
                break;
@@ -313,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
        } else {
            // Switch page type to uncached if now uncached
            switch (page_type) {
-            case PageType::Unmapped:
+            case Common::PageType::Unmapped:
                // It is not necessary for a process to have this region mapped into its address
                // space, for example, a system module need not have a VRAM mapping.
                break;
-            case PageType::Memory:
+            case Common::PageType::Memory:
                // There can be more than one GPU region mapped per CPU region, so it's common that
                // this area is already unmarked as cached.
                break;
-            case PageType::RasterizerCachedMemory: {
+            case Common::PageType::RasterizerCachedMemory: {
                u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
                if (pointer == nullptr) {
                    // It's possible that this function has been called while updating the pagetable
                    // after unmapping a VMA. In that case the underlying VMA will no longer exist,
                    // and we should just leave the pagetable entry blank.
-                    page_type = PageType::Unmapped;
+                    page_type = Common::PageType::Unmapped;
                } else {
-                    page_type = PageType::Memory;
+                    page_type = Common::PageType::Memory;
                    current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
                }
                break;
@@ -341,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
    }
 }

-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
-    auto& system_instance = Core::System::GetInstance();
-
-    // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
-    // null here
-    if (!system_instance.IsPoweredOn()) {
-        return;
-    }
-
-    const VAddr end = start + size;
-
-    const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
-        if (start >= region_end || end <= region_start) {
-            // No overlap with region
-            return;
-        }
-
-        const VAddr overlap_start = std::max(start, region_start);
-        const VAddr overlap_end = std::min(end, region_end);
-        const VAddr overlap_size = overlap_end - overlap_start;
-
-        auto& rasterizer = system_instance.Renderer().Rasterizer();
-        switch (mode) {
-        case FlushMode::Flush:
-            rasterizer.FlushRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::Invalidate:
-            rasterizer.InvalidateRegion(overlap_start, overlap_size);
-            break;
-        case FlushMode::FlushAndInvalidate:
-            rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
-            break;
-        }
-    };
-
-    const auto& vm_manager = Core::CurrentProcess()->VMManager();
-
-    CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
-    CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
-}
-
 u8 Read8(const VAddr addr) {
    return Read<u8>(addr);
 }
@@ -412,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, src_addr, size);
            std::memset(dest_buffer, 0, copy_amount);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
            std::memcpy(dest_buffer, src_ptr, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(dest_buffer, host_ptr, copy_amount);
            break;
        }
        default:
@@ -476,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, dest_addr, size);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
            std::memcpy(dest_ptr, src_buffer, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memcpy(host_ptr, src_buffer, copy_amount);
            break;
        }
        default:
@@ -522,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, dest_addr, size);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);

            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
            std::memset(dest_ptr, 0, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
+            std::memset(host_ptr, 0, copy_amount);
            break;
        }
        default:
@@ -564,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
        const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);

        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+        case Common::PageType::Unmapped: {
            LOG_ERROR(HW_Memory,
                      "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
                      current_vaddr, src_addr, size);
            ZeroBlock(process, dest_addr, copy_amount);
            break;
        }
-        case PageType::Memory: {
+        case Common::PageType::Memory: {
            DEBUG_ASSERT(page_table.pointers[page_index]);
            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
            WriteBlock(process, dest_addr, src_ptr, copy_amount);
            break;
        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount);
+        case Common::PageType::RasterizerCachedMemory: {
+            const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
+            Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
+            WriteBlock(process, dest_addr, host_ptr, copy_amount);
            break;
        }
        default:
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -10,7 +10,10 @@
 #include <vector>
 #include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
-#include "core/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}

 namespace Kernel {
 class Process;
@@ -26,71 +29,6 @@ constexpr std::size_t PAGE_BITS = 12;
 constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
 constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

-enum class PageType : u8 {
-    /// Page is unmapped and should cause an access error.
-    Unmapped,
-    /// Page is mapped to regular memory. This is the only type you can get pointers to.
-    Memory,
-    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
-    /// invalidation
-    RasterizerCachedMemory,
-    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
-    Special,
-};
-
-struct SpecialRegion {
-    enum class Type {
-        DebugHook,
-        IODevice,
-    } type;
-
-    MemoryHookPointer handler;
-
-    bool operator<(const SpecialRegion& other) const {
-        return std::tie(type, handler) < std::tie(other.type, other.handler);
-    }
-
-    bool operator==(const SpecialRegion& other) const {
-        return std::tie(type, handler) == std::tie(other.type, other.handler);
-    }
-};
-
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works.
- */
-struct PageTable {
-    explicit PageTable();
-    explicit PageTable(std::size_t address_space_width_in_bits);
-    ~PageTable();
-
-    /**
-     * Resizes the page table to be able to accomodate enough pages within
-     * a given address space.
-     *
-     * @param address_space_width_in_bits The address size width in bits.
-     */
-    void Resize(std::size_t address_space_width_in_bits);
-
-    /**
-     * Vector of memory pointers backing each page. An entry can only be non-null if the
-     * corresponding entry in the `attributes` vector is of type `Memory`.
-     */
-    std::vector<u8*> pointers;
-
-    /**
-     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
-     * of type `Special`.
-     */
-    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
-
-    /**
-     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
-     * the corresponding entry in `pointers` MUST be set to null.
-     */
-    std::vector<PageType> attributes;
-};
-
 /// Virtual user-space memory regions
 enum : VAddr {
    /// Read-only page containing kernel and system configuration values.
@@ -116,8 +54,8 @@ enum : VAddr {
 };

 /// Currently active page table
-void SetCurrentPageTable(PageTable* page_table);
-PageTable* GetCurrentPageTable();
+void SetCurrentPageTable(Common::PageTable* page_table);
+Common::PageTable* GetCurrentPageTable();

 /// Determines if the given VAddr is valid for the specified process.
 bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +99,4 @@ enum class FlushMode {
 */
 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);

-/**
- * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
- * address region.
- */
-void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
-
 } // namespace Memory
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
 #pragma once

 #include "common/common_types.h"
-#include "core/memory_hook.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+struct PageTable;
+}

 namespace Memory {

@@ -17,7 +21,7 @@ namespace Memory {
 * @param size The amount of bytes to map. Must be page-aligned.
 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
 */
-void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
+void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);

 /**
 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
 * @param size The amount of bytes to map. Must be page-aligned.
 * @param mmio_handler The handler that backs the mapping.
 */
-void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler);
+void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
+                 Common::MemoryHookPointer mmio_handler);

-void UnmapRegion(PageTable& page_table, VAddr base, u64 size);
+void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);

-void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
-void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook);
+void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                  Common::MemoryHookPointer hook);
+void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
+                     Common::MemoryHookPointer hook);

 } // namespace Memory
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
    LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
    LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
    LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
+    LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
    LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
+    LogSetting("Renderer_UseAsynchronousGpuEmulation",
+               Settings::values.use_asynchronous_gpu_emulation);
    LogSetting("Audio_OutputEngine", Settings::values.sink_id);
    LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
    LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
    u16 frame_limit;
    bool use_disk_shader_cache;
    bool use_accurate_gpu_emulation;
+    bool use_asynchronous_gpu_emulation;

    float bg_red;
    float bg_green;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
             Settings::values.use_disk_shader_cache);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
             Settings::values.use_accurate_gpu_emulation);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
+             Settings::values.use_asynchronous_gpu_emulation);
    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
             Settings::values.use_docked_mode);
 }
--- a/src/input_common/CMakeLists.txt
+++ b/src/input_common/CMakeLists.txt
@@ -7,15 +7,18 @@ add_library(input_common STATIC
    main.h
    motion_emu.cpp
    motion_emu.h
-
-    $<$<BOOL:${SDL2_FOUND}>:sdl/sdl.cpp sdl/sdl.h>
+    sdl/sdl.cpp
+    sdl/sdl.h
 )

-create_target_directory_groups(input_common)
-
-target_link_libraries(input_common PUBLIC core PRIVATE common)
-
 if(SDL2_FOUND)
+    target_sources(input_common PRIVATE
+        sdl/sdl_impl.cpp
+        sdl/sdl_impl.h
+    )
    target_link_libraries(input_common PRIVATE SDL2)
    target_compile_definitions(input_common PRIVATE HAVE_SDL2)
 endif()
+
+create_target_directory_groups(input_common)
+target_link_libraries(input_common PUBLIC core PRIVATE common)
--- a/src/input_common/main.cpp
+++ b/src/input_common/main.cpp
@@ -17,10 +17,7 @@ namespace InputCommon {

 static std::shared_ptr<Keyboard> keyboard;
 static std::shared_ptr<MotionEmu> motion_emu;
-
-#ifdef HAVE_SDL2
-static std::thread poll_thread;
-#endif
+static std::unique_ptr<SDL::State> sdl;

 void Init() {
    keyboard = std::make_shared<Keyboard>();
@@ -30,15 +27,7 @@ void Init() {
    motion_emu = std::make_shared<MotionEmu>();
    Input::RegisterFactory<Input::MotionDevice>("motion_emu", motion_emu);

-#ifdef HAVE_SDL2
-    SDL::Init();
-#endif
-}
-
-void StartJoystickEventHandler() {
-#ifdef HAVE_SDL2
-    poll_thread = std::thread(SDL::PollLoop);
-#endif
+    sdl = SDL::Init();
 }

 void Shutdown() {
@@ -47,11 +36,7 @@ void Shutdown() {
    Input::UnregisterFactory<Input::AnalogDevice>("analog_from_button");
    Input::UnregisterFactory<Input::MotionDevice>("motion_emu");
    motion_emu.reset();
-
-#ifdef HAVE_SDL2
-    SDL::Shutdown();
-    poll_thread.join();
-#endif
+    sdl.reset();
 }

 Keyboard* GetKeyboard() {
@@ -88,7 +73,7 @@ namespace Polling {

 std::vector<std::unique_ptr<DevicePoller>> GetPollers(DeviceType type) {
 #ifdef HAVE_SDL2
-    return SDL::Polling::GetPollers(type);
+    return sdl->GetPollers(type);
 #else
    return {};
 #endif
--- a/src/input_common/main.h
+++ b/src/input_common/main.h
@@ -20,8 +20,6 @@ void Init();
 /// Deregisters all built-in input device factories and shuts them down.
 void Shutdown();

-void StartJoystickEventHandler();
-
 class Keyboard;

 /// Gets the keyboard button device factory.
--- a/src/input_common/sdl/sdl.cpp
+++ b/src/input_common/sdl/sdl.cpp
@@ -1,631 +1,19 @@
-// Copyright 2017 Citra Emulator Project
+// Copyright 2018 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <algorithm>
-#include <atomic>
-#include <cmath>
-#include <functional>
-#include <iterator>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <tuple>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-#include <SDL.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/math_util.h"
-#include "common/param_package.h"
-#include "common/threadsafe_queue.h"
-#include "input_common/main.h"
 #include "input_common/sdl/sdl.h"
+#ifdef HAVE_SDL2
+#include "input_common/sdl/sdl_impl.h"
+#endif

-namespace InputCommon {
+namespace InputCommon::SDL {

-namespace SDL {
-
-class SDLJoystick;
-class SDLButtonFactory;
-class SDLAnalogFactory;
-
-/// Map of GUID of a list of corresponding virtual Joysticks
-static std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
-static std::mutex joystick_map_mutex;
-
-static std::shared_ptr<SDLButtonFactory> button_factory;
-static std::shared_ptr<SDLAnalogFactory> analog_factory;
-
-/// Used by the Pollers during config
-static std::atomic<bool> polling;
-static Common::SPSCQueue<SDL_Event> event_queue;
-
-static std::atomic<bool> initialized = false;
-
-static std::string GetGUID(SDL_Joystick* joystick) {
-    SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
-    char guid_str[33];
-    SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
-    return guid_str;
+std::unique_ptr<State> Init() {
+#ifdef HAVE_SDL2
+    return std::make_unique<SDLState>();
+#else
+    return std::make_unique<NullState>();
+#endif
 }
-
-class SDLJoystick {
-public:
-    SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
-                decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
-        : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
-
-    void SetButton(int button, bool value) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.buttons[button] = value;
-    }
-
-    bool GetButton(int button) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return state.buttons.at(button);
-    }
-
-    void SetAxis(int axis, Sint16 value) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.axes[axis] = value;
-    }
-
-    float GetAxis(int axis) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return state.axes.at(axis) / 32767.0f;
-    }
-
-    std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
-        float x = GetAxis(axis_x);
-        float y = GetAxis(axis_y);
-        y = -y; // 3DS uses an y-axis inverse from SDL
-
-        // Make sure the coordinates are in the unit circle,
-        // otherwise normalize it.
-        float r = x * x + y * y;
-        if (r > 1.0f) {
-            r = std::sqrt(r);
-            x /= r;
-            y /= r;
-        }
-
-        return std::make_tuple(x, y);
-    }
-
-    void SetHat(int hat, Uint8 direction) {
-        std::lock_guard<std::mutex> lock(mutex);
-        state.hats[hat] = direction;
-    }
-
-    bool GetHatDirection(int hat, Uint8 direction) const {
-        std::lock_guard<std::mutex> lock(mutex);
-        return (state.hats.at(hat) & direction) != 0;
-    }
-    /**
-     * The guid of the joystick
-     */
-    const std::string& GetGUID() const {
-        return guid;
-    }
-
-    /**
-     * The number of joystick from the same type that were connected before this joystick
-     */
-    int GetPort() const {
-        return port;
-    }
-
-    SDL_Joystick* GetSDLJoystick() const {
-        return sdl_joystick.get();
-    }
-
-    void SetSDLJoystick(SDL_Joystick* joystick,
-                        decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
-        sdl_joystick =
-            std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
-    }
-
-private:
-    struct State {
-        std::unordered_map<int, bool> buttons;
-        std::unordered_map<int, Sint16> axes;
-        std::unordered_map<int, Uint8> hats;
-    } state;
-    std::string guid;
-    int port;
-    std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
-    mutable std::mutex mutex;
-};
-
-/**
- * Get the nth joystick with the corresponding GUID
- */
-static std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    const auto it = joystick_map.find(guid);
-    if (it != joystick_map.end()) {
-        while (it->second.size() <= port) {
-            auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
-                                                          [](SDL_Joystick*) {});
-            it->second.emplace_back(std::move(joystick));
-        }
-        return it->second[port];
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
-    return joystick_map[guid].emplace_back(std::move(joystick));
-}
-
-/**
- * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
- * it to a SDLJoystick with the same guid and that port
- */
-static std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
-    const std::string guid = GetGUID(sdl_joystick);
-    auto map_it = joystick_map.find(guid);
-    if (map_it != joystick_map.end()) {
-        auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
-                                   [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
-                                       return sdl_joystick == joystick->GetSDLJoystick();
-                                   });
-        if (vec_it != map_it->second.end()) {
-            // This is the common case: There is already an existing SDL_Joystick maped to a
-            // SDLJoystick. return the SDLJoystick
-            return *vec_it;
-        }
-        // Search for a SDLJoystick without a mapped SDL_Joystick...
-        auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
-                                       [](const std::shared_ptr<SDLJoystick>& joystick) {
-                                           return !joystick->GetSDLJoystick();
-                                       });
-        if (nullptr_it != map_it->second.end()) {
-            // ... and map it
-            (*nullptr_it)->SetSDLJoystick(sdl_joystick);
-            return *nullptr_it;
-        }
-        // There is no SDLJoystick without a mapped SDL_Joystick
-        // Create a new SDLJoystick
-        auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
-        return map_it->second.emplace_back(std::move(joystick));
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
-    return joystick_map[guid].emplace_back(std::move(joystick));
-}
-
-void InitJoystick(int joystick_index) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
-    if (!sdl_joystick) {
-        LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
-        return;
-    }
-    std::string guid = GetGUID(sdl_joystick);
-    if (joystick_map.find(guid) == joystick_map.end()) {
-        auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
-        joystick_map[guid].emplace_back(std::move(joystick));
-        return;
-    }
-    auto& joystick_guid_list = joystick_map[guid];
-    const auto it = std::find_if(
-        joystick_guid_list.begin(), joystick_guid_list.end(),
-        [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
-    if (it != joystick_guid_list.end()) {
-        (*it)->SetSDLJoystick(sdl_joystick);
-        return;
-    }
-    auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
-    joystick_guid_list.emplace_back(std::move(joystick));
-}
-
-void CloseJoystick(SDL_Joystick* sdl_joystick) {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    std::string guid = GetGUID(sdl_joystick);
-    // This call to guid is save since the joystick is guranteed to be in that map
-    auto& joystick_guid_list = joystick_map[guid];
-    const auto joystick_it =
-        std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
-                     [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
-                         return joystick->GetSDLJoystick() == sdl_joystick;
-                     });
-    (*joystick_it)->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
-}
-
-void HandleGameControllerEvent(const SDL_Event& event) {
-    switch (event.type) {
-    case SDL_JOYBUTTONUP: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        if (joystick) {
-            joystick->SetButton(event.jbutton.button, false);
-        }
-        break;
-    }
-    case SDL_JOYBUTTONDOWN: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        if (joystick) {
-            joystick->SetButton(event.jbutton.button, true);
-        }
-        break;
-    }
-    case SDL_JOYHATMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
-        if (joystick) {
-            joystick->SetHat(event.jhat.hat, event.jhat.value);
-        }
-        break;
-    }
-    case SDL_JOYAXISMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-        if (joystick) {
-            joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
-        }
-        break;
-    }
-    case SDL_JOYDEVICEREMOVED:
-        LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
-        CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
-        break;
-    case SDL_JOYDEVICEADDED:
-        LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
-        InitJoystick(event.jdevice.which);
-        break;
-    }
-}
-
-void CloseSDLJoysticks() {
-    std::lock_guard<std::mutex> lock(joystick_map_mutex);
-    joystick_map.clear();
-}
-
-void PollLoop() {
-    if (SDL_Init(SDL_INIT_JOYSTICK) < 0) {
-        LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
-        return;
-    }
-
-    SDL_Event event;
-    while (initialized) {
-        // Wait for 10 ms or until an event happens
-        if (SDL_WaitEventTimeout(&event, 10)) {
-            // Don't handle the event if we are configuring
-            if (polling) {
-                event_queue.Push(event);
-            } else {
-                HandleGameControllerEvent(event);
-            }
-        }
-    }
-    CloseSDLJoysticks();
-    SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
-}
-
-class SDLButton final : public Input::ButtonDevice {
-public:
-    explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
-        : joystick(std::move(joystick_)), button(button_) {}
-
-    bool GetStatus() const override {
-        return joystick->GetButton(button);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int button;
-};
-
-class SDLDirectionButton final : public Input::ButtonDevice {
-public:
-    explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
-        : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
-
-    bool GetStatus() const override {
-        return joystick->GetHatDirection(hat, direction);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int hat;
-    Uint8 direction;
-};
-
-class SDLAxisButton final : public Input::ButtonDevice {
-public:
-    explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
-                           bool trigger_if_greater_)
-        : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
-          trigger_if_greater(trigger_if_greater_) {}
-
-    bool GetStatus() const override {
-        float axis_value = joystick->GetAxis(axis);
-        if (trigger_if_greater)
-            return axis_value > threshold;
-        return axis_value < threshold;
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int axis;
-    float threshold;
-    bool trigger_if_greater;
-};
-
-class SDLAnalog final : public Input::AnalogDevice {
-public:
-    SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_)
-        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_) {}
-
-    std::tuple<float, float> GetStatus() const override {
-        return joystick->GetAnalog(axis_x, axis_y);
-    }
-
-private:
-    std::shared_ptr<SDLJoystick> joystick;
-    int axis_x;
-    int axis_y;
-};
-
-/// A button device factory that creates button devices from SDL joystick
-class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
-public:
-    /**
-     * Creates a button device from a joystick button
-     * @param params contains parameters for creating the device:
-     *     - "guid": the guid of the joystick to bind
-     *     - "port": the nth joystick of the same type to bind
-     *     - "button"(optional): the index of the button to bind
-     *     - "hat"(optional): the index of the hat to bind as direction buttons
-     *     - "axis"(optional): the index of the axis to bind
-     *     - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
-     *         "down", "left" or "right"
-     *     - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
-     *         triggered if the axis value crosses
-     *     - "direction"(only used for axis): "+" means the button is triggered when the axis
-     * value is greater than the threshold; "-" means the button is triggered when the axis
-     * value is smaller than the threshold
-     */
-    std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
-        const std::string guid = params.Get("guid", "0");
-        const int port = params.Get("port", 0);
-
-        auto joystick = GetSDLJoystickByGUID(guid, port);
-
-        if (params.Has("hat")) {
-            const int hat = params.Get("hat", 0);
-            const std::string direction_name = params.Get("direction", "");
-            Uint8 direction;
-            if (direction_name == "up") {
-                direction = SDL_HAT_UP;
-            } else if (direction_name == "down") {
-                direction = SDL_HAT_DOWN;
-            } else if (direction_name == "left") {
-                direction = SDL_HAT_LEFT;
-            } else if (direction_name == "right") {
-                direction = SDL_HAT_RIGHT;
-            } else {
-                direction = 0;
-            }
-            // This is necessary so accessing GetHat with hat won't crash
-            joystick->SetHat(hat, SDL_HAT_CENTERED);
-            return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
-        }
-
-        if (params.Has("axis")) {
-            const int axis = params.Get("axis", 0);
-            const float threshold = params.Get("threshold", 0.5f);
-            const std::string direction_name = params.Get("direction", "");
-            bool trigger_if_greater;
-            if (direction_name == "+") {
-                trigger_if_greater = true;
-            } else if (direction_name == "-") {
-                trigger_if_greater = false;
-            } else {
-                trigger_if_greater = true;
-                LOG_ERROR(Input, "Unknown direction '{}'", direction_name);
-            }
-            // This is necessary so accessing GetAxis with axis won't crash
-            joystick->SetAxis(axis, 0);
-            return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
-        }
-
-        const int button = params.Get("button", 0);
-        // This is necessary so accessing GetButton with button won't crash
-        joystick->SetButton(button, false);
-        return std::make_unique<SDLButton>(joystick, button);
-    }
-};
-
-/// An analog device factory that creates analog devices from SDL joystick
-class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
-public:
-    /**
-     * Creates analog device from joystick axes
-     * @param params contains parameters for creating the device:
-     *     - "guid": the guid of the joystick to bind
-     *     - "port": the nth joystick of the same type
-     *     - "axis_x": the index of the axis to be bind as x-axis
-     *     - "axis_y": the index of the axis to be bind as y-axis
-     */
-    std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
-        const std::string guid = params.Get("guid", "0");
-        const int port = params.Get("port", 0);
-        const int axis_x = params.Get("axis_x", 0);
-        const int axis_y = params.Get("axis_y", 1);
-
-        auto joystick = GetSDLJoystickByGUID(guid, port);
-
-        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
-        joystick->SetAxis(axis_x, 0);
-        joystick->SetAxis(axis_y, 0);
-        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y);
-    }
-};
-
-void Init() {
-    using namespace Input;
-    RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>());
-    RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>());
-    polling = false;
-    initialized = true;
-}
-
-void Shutdown() {
-    if (initialized) {
-        using namespace Input;
-        UnregisterFactory<ButtonDevice>("sdl");
-        UnregisterFactory<AnalogDevice>("sdl");
-        initialized = false;
-    }
-}
-
-Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event) {
-    Common::ParamPackage params({{"engine", "sdl"}});
-    switch (event.type) {
-    case SDL_JOYAXISMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("axis", event.jaxis.axis);
-        if (event.jaxis.value > 0) {
-            params.Set("direction", "+");
-            params.Set("threshold", "0.5");
-        } else {
-            params.Set("direction", "-");
-            params.Set("threshold", "-0.5");
-        }
-        break;
-    }
-    case SDL_JOYBUTTONUP: {
-        auto joystick = GetSDLJoystickBySDLID(event.jbutton.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("button", event.jbutton.button);
-        break;
-    }
-    case SDL_JOYHATMOTION: {
-        auto joystick = GetSDLJoystickBySDLID(event.jhat.which);
-        params.Set("port", joystick->GetPort());
-        params.Set("guid", joystick->GetGUID());
-        params.Set("hat", event.jhat.hat);
-        switch (event.jhat.value) {
-        case SDL_HAT_UP:
-            params.Set("direction", "up");
-            break;
-        case SDL_HAT_DOWN:
-            params.Set("direction", "down");
-            break;
-        case SDL_HAT_LEFT:
-            params.Set("direction", "left");
-            break;
-        case SDL_HAT_RIGHT:
-            params.Set("direction", "right");
-            break;
-        default:
-            return {};
-        }
-        break;
-    }
-    }
-    return params;
-}
-
-namespace Polling {
-
-class SDLPoller : public InputCommon::Polling::DevicePoller {
-public:
-    void Start() override {
-        event_queue.Clear();
-        polling = true;
-    }
-
-    void Stop() override {
-        polling = false;
-    }
-};
-
-class SDLButtonPoller final : public SDLPoller {
-public:
-    Common::ParamPackage GetNextInput() override {
-        SDL_Event event;
-        while (event_queue.Pop(event)) {
-            switch (event.type) {
-            case SDL_JOYAXISMOTION:
-                if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
-                    break;
-                }
-            case SDL_JOYBUTTONUP:
-            case SDL_JOYHATMOTION:
-                return SDLEventToButtonParamPackage(event);
-            }
-        }
-        return {};
-    }
-};
-
-class SDLAnalogPoller final : public SDLPoller {
-public:
-    void Start() override {
-        SDLPoller::Start();
-
-        // Reset stored axes
-        analog_xaxis = -1;
-        analog_yaxis = -1;
-        analog_axes_joystick = -1;
-    }
-
-    Common::ParamPackage GetNextInput() override {
-        SDL_Event event;
-        while (event_queue.Pop(event)) {
-            if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
-                continue;
-            }
-            // An analog device needs two axes, so we need to store the axis for later and wait for
-            // a second SDL event. The axes also must be from the same joystick.
-            int axis = event.jaxis.axis;
-            if (analog_xaxis == -1) {
-                analog_xaxis = axis;
-                analog_axes_joystick = event.jaxis.which;
-            } else if (analog_yaxis == -1 && analog_xaxis != axis &&
-                       analog_axes_joystick == event.jaxis.which) {
-                analog_yaxis = axis;
-            }
-        }
-        Common::ParamPackage params;
-        if (analog_xaxis != -1 && analog_yaxis != -1) {
-            auto joystick = GetSDLJoystickBySDLID(event.jaxis.which);
-            params.Set("engine", "sdl");
-            params.Set("port", joystick->GetPort());
-            params.Set("guid", joystick->GetGUID());
-            params.Set("axis_x", analog_xaxis);
-            params.Set("axis_y", analog_yaxis);
-            analog_xaxis = -1;
-            analog_yaxis = -1;
-            analog_axes_joystick = -1;
-            return params;
-        }
-        return params;
-    }
-
-private:
-    int analog_xaxis = -1;
-    int analog_yaxis = -1;
-    SDL_JoystickID analog_axes_joystick = -1;
-};
-
-std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-    InputCommon::Polling::DeviceType type) {
-    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
-    switch (type) {
-    case InputCommon::Polling::DeviceType::Analog:
-        pollers.push_back(std::make_unique<SDLAnalogPoller>());
-        break;
-    case InputCommon::Polling::DeviceType::Button:
-        pollers.push_back(std::make_unique<SDLButtonPoller>());
-        break;
-    }
-    return pollers;
-}
-} // namespace Polling
-} // namespace SDL
-} // namespace InputCommon
+} // namespace InputCommon::SDL
--- a/src/input_common/sdl/sdl.h
+++ b/src/input_common/sdl/sdl.h
@@ -1,4 +1,4 @@
-// Copyright 2017 Citra Emulator Project
+// Copyright 2018 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -7,45 +7,36 @@
 #include <memory>
 #include <vector>
 #include "core/frontend/input.h"
+#include "input_common/main.h"

 union SDL_Event;
+
 namespace Common {
 class ParamPackage;
-}
-namespace InputCommon {
-namespace Polling {
+} // namespace Common
+
+namespace InputCommon::Polling {
 class DevicePoller;
 enum class DeviceType;
-} // namespace Polling
-} // namespace InputCommon
+} // namespace InputCommon::Polling

-namespace InputCommon {
-namespace SDL {
+namespace InputCommon::SDL {

-/// Initializes and registers SDL device factories
-void Init();
+class State {
+public:
+    /// Unresisters SDL device factories and shut them down.
+    virtual ~State() = default;

-/// Unresisters SDL device factories and shut them down.
-void Shutdown();
+    virtual std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
+        InputCommon::Polling::DeviceType type) = 0;
+};

-/// Needs to be called before SDL_QuitSubSystem.
-void CloseSDLJoysticks();
+class NullState : public State {
+public:
+    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
+        InputCommon::Polling::DeviceType type) override {}
+};

-/// Handle SDL_Events for joysticks from SDL_PollEvent
-void HandleGameControllerEvent(const SDL_Event& event);
+std::unique_ptr<State> Init();

-/// A Loop that calls HandleGameControllerEvent until Shutdown is called
-void PollLoop();
-
-/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
-Common::ParamPackage SDLEventToButtonParamPackage(const SDL_Event& event);
-
-namespace Polling {
-
-/// Get all DevicePoller that use the SDL backend for a specific device type
-std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
-    InputCommon::Polling::DeviceType type);
-
-} // namespace Polling
-} // namespace SDL
-} // namespace InputCommon
+} // namespace InputCommon::SDL
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -0,0 +1,669 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <functional>
+#include <iterator>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+#include <SDL.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "common/param_package.h"
+#include "common/threadsafe_queue.h"
+#include "core/frontend/input.h"
+#include "input_common/sdl/sdl_impl.h"
+
+namespace InputCommon {
+
+namespace SDL {
+
+static std::string GetGUID(SDL_Joystick* joystick) {
+    SDL_JoystickGUID guid = SDL_JoystickGetGUID(joystick);
+    char guid_str[33];
+    SDL_JoystickGetGUIDString(guid, guid_str, sizeof(guid_str));
+    return guid_str;
+}
+
+/// Creates a ParamPackage from an SDL_Event that can directly be used to create a ButtonDevice
+static Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event);
+
+static int SDLEventWatcher(void* userdata, SDL_Event* event) {
+    SDLState* sdl_state = reinterpret_cast<SDLState*>(userdata);
+    // Don't handle the event if we are configuring
+    if (sdl_state->polling) {
+        sdl_state->event_queue.Push(*event);
+    } else {
+        sdl_state->HandleGameControllerEvent(*event);
+    }
+    return 0;
+}
+
+class SDLJoystick {
+public:
+    SDLJoystick(std::string guid_, int port_, SDL_Joystick* joystick,
+                decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose)
+        : guid{std::move(guid_)}, port{port_}, sdl_joystick{joystick, deleter} {}
+
+    void SetButton(int button, bool value) {
+        std::lock_guard<std::mutex> lock(mutex);
+        state.buttons[button] = value;
+    }
+
+    bool GetButton(int button) const {
+        std::lock_guard<std::mutex> lock(mutex);
+        return state.buttons.at(button);
+    }
+
+    void SetAxis(int axis, Sint16 value) {
+        std::lock_guard<std::mutex> lock(mutex);
+        state.axes[axis] = value;
+    }
+
+    float GetAxis(int axis) const {
+        std::lock_guard<std::mutex> lock(mutex);
+        return state.axes.at(axis) / 32767.0f;
+    }
+
+    std::tuple<float, float> GetAnalog(int axis_x, int axis_y) const {
+        float x = GetAxis(axis_x);
+        float y = GetAxis(axis_y);
+        y = -y; // 3DS uses an y-axis inverse from SDL
+
+        // Make sure the coordinates are in the unit circle,
+        // otherwise normalize it.
+        float r = x * x + y * y;
+        if (r > 1.0f) {
+            r = std::sqrt(r);
+            x /= r;
+            y /= r;
+        }
+
+        return std::make_tuple(x, y);
+    }
+
+    void SetHat(int hat, Uint8 direction) {
+        std::lock_guard<std::mutex> lock(mutex);
+        state.hats[hat] = direction;
+    }
+
+    bool GetHatDirection(int hat, Uint8 direction) const {
+        std::lock_guard<std::mutex> lock(mutex);
+        return (state.hats.at(hat) & direction) != 0;
+    }
+    /**
+     * The guid of the joystick
+     */
+    const std::string& GetGUID() const {
+        return guid;
+    }
+
+    /**
+     * The number of joystick from the same type that were connected before this joystick
+     */
+    int GetPort() const {
+        return port;
+    }
+
+    SDL_Joystick* GetSDLJoystick() const {
+        return sdl_joystick.get();
+    }
+
+    void SetSDLJoystick(SDL_Joystick* joystick,
+                        decltype(&SDL_JoystickClose) deleter = &SDL_JoystickClose) {
+        sdl_joystick =
+            std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)>(joystick, deleter);
+    }
+
+private:
+    struct State {
+        std::unordered_map<int, bool> buttons;
+        std::unordered_map<int, Sint16> axes;
+        std::unordered_map<int, Uint8> hats;
+    } state;
+    std::string guid;
+    int port;
+    std::unique_ptr<SDL_Joystick, decltype(&SDL_JoystickClose)> sdl_joystick;
+    mutable std::mutex mutex;
+};
+
+/**
+ * Get the nth joystick with the corresponding GUID
+ */
+std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickByGUID(const std::string& guid, int port) {
+    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    const auto it = joystick_map.find(guid);
+    if (it != joystick_map.end()) {
+        while (it->second.size() <= port) {
+            auto joystick = std::make_shared<SDLJoystick>(guid, it->second.size(), nullptr,
+                                                          [](SDL_Joystick*) {});
+            it->second.emplace_back(std::move(joystick));
+        }
+        return it->second[port];
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, 0, nullptr, [](SDL_Joystick*) {});
+    return joystick_map[guid].emplace_back(std::move(joystick));
+}
+
+/**
+ * Check how many identical joysticks (by guid) were connected before the one with sdl_id and so tie
+ * it to a SDLJoystick with the same guid and that port
+ */
+std::shared_ptr<SDLJoystick> SDLState::GetSDLJoystickBySDLID(SDL_JoystickID sdl_id) {
+    auto sdl_joystick = SDL_JoystickFromInstanceID(sdl_id);
+    const std::string guid = GetGUID(sdl_joystick);
+    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    auto map_it = joystick_map.find(guid);
+    if (map_it != joystick_map.end()) {
+        auto vec_it = std::find_if(map_it->second.begin(), map_it->second.end(),
+                                   [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
+                                       return sdl_joystick == joystick->GetSDLJoystick();
+                                   });
+        if (vec_it != map_it->second.end()) {
+            // This is the common case: There is already an existing SDL_Joystick maped to a
+            // SDLJoystick. return the SDLJoystick
+            return *vec_it;
+        }
+        // Search for a SDLJoystick without a mapped SDL_Joystick...
+        auto nullptr_it = std::find_if(map_it->second.begin(), map_it->second.end(),
+                                       [](const std::shared_ptr<SDLJoystick>& joystick) {
+                                           return !joystick->GetSDLJoystick();
+                                       });
+        if (nullptr_it != map_it->second.end()) {
+            // ... and map it
+            (*nullptr_it)->SetSDLJoystick(sdl_joystick);
+            return *nullptr_it;
+        }
+        // There is no SDLJoystick without a mapped SDL_Joystick
+        // Create a new SDLJoystick
+        auto joystick = std::make_shared<SDLJoystick>(guid, map_it->second.size(), sdl_joystick);
+        return map_it->second.emplace_back(std::move(joystick));
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
+    return joystick_map[guid].emplace_back(std::move(joystick));
+}
+
+void SDLState::InitJoystick(int joystick_index) {
+    SDL_Joystick* sdl_joystick = SDL_JoystickOpen(joystick_index);
+    if (!sdl_joystick) {
+        LOG_ERROR(Input, "failed to open joystick {}", joystick_index);
+        return;
+    }
+    std::string guid = GetGUID(sdl_joystick);
+    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    if (joystick_map.find(guid) == joystick_map.end()) {
+        auto joystick = std::make_shared<SDLJoystick>(guid, 0, sdl_joystick);
+        joystick_map[guid].emplace_back(std::move(joystick));
+        return;
+    }
+    auto& joystick_guid_list = joystick_map[guid];
+    const auto it = std::find_if(
+        joystick_guid_list.begin(), joystick_guid_list.end(),
+        [](const std::shared_ptr<SDLJoystick>& joystick) { return !joystick->GetSDLJoystick(); });
+    if (it != joystick_guid_list.end()) {
+        (*it)->SetSDLJoystick(sdl_joystick);
+        return;
+    }
+    auto joystick = std::make_shared<SDLJoystick>(guid, joystick_guid_list.size(), sdl_joystick);
+    joystick_guid_list.emplace_back(std::move(joystick));
+}
+
+void SDLState::CloseJoystick(SDL_Joystick* sdl_joystick) {
+    std::string guid = GetGUID(sdl_joystick);
+    std::shared_ptr<SDLJoystick> joystick;
+    {
+        std::lock_guard<std::mutex> lock(joystick_map_mutex);
+        // This call to guid is safe since the joystick is guaranteed to be in the map
+        auto& joystick_guid_list = joystick_map[guid];
+        const auto joystick_it =
+            std::find_if(joystick_guid_list.begin(), joystick_guid_list.end(),
+                         [&sdl_joystick](const std::shared_ptr<SDLJoystick>& joystick) {
+                             return joystick->GetSDLJoystick() == sdl_joystick;
+                         });
+        joystick = *joystick_it;
+    }
+    // Destruct SDL_Joystick outside the lock guard because SDL can internally call event calback
+    // which locks the mutex again
+    joystick->SetSDLJoystick(nullptr, [](SDL_Joystick*) {});
+}
+
+void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
+    switch (event.type) {
+    case SDL_JOYBUTTONUP: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
+            joystick->SetButton(event.jbutton.button, false);
+        }
+        break;
+    }
+    case SDL_JOYBUTTONDOWN: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jbutton.which)) {
+            joystick->SetButton(event.jbutton.button, true);
+        }
+        break;
+    }
+    case SDL_JOYHATMOTION: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jhat.which)) {
+            joystick->SetHat(event.jhat.hat, event.jhat.value);
+        }
+        break;
+    }
+    case SDL_JOYAXISMOTION: {
+        if (auto joystick = GetSDLJoystickBySDLID(event.jaxis.which)) {
+            joystick->SetAxis(event.jaxis.axis, event.jaxis.value);
+        }
+        break;
+    }
+    case SDL_JOYDEVICEREMOVED:
+        LOG_DEBUG(Input, "Controller removed with Instance_ID {}", event.jdevice.which);
+        CloseJoystick(SDL_JoystickFromInstanceID(event.jdevice.which));
+        break;
+    case SDL_JOYDEVICEADDED:
+        LOG_DEBUG(Input, "Controller connected with device index {}", event.jdevice.which);
+        InitJoystick(event.jdevice.which);
+        break;
+    }
+}
+
+void SDLState::CloseJoysticks() {
+    std::lock_guard<std::mutex> lock(joystick_map_mutex);
+    joystick_map.clear();
+}
+
+class SDLButton final : public Input::ButtonDevice {
+public:
+    explicit SDLButton(std::shared_ptr<SDLJoystick> joystick_, int button_)
+        : joystick(std::move(joystick_)), button(button_) {}
+
+    bool GetStatus() const override {
+        return joystick->GetButton(button);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int button;
+};
+
+class SDLDirectionButton final : public Input::ButtonDevice {
+public:
+    explicit SDLDirectionButton(std::shared_ptr<SDLJoystick> joystick_, int hat_, Uint8 direction_)
+        : joystick(std::move(joystick_)), hat(hat_), direction(direction_) {}
+
+    bool GetStatus() const override {
+        return joystick->GetHatDirection(hat, direction);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int hat;
+    Uint8 direction;
+};
+
+class SDLAxisButton final : public Input::ButtonDevice {
+public:
+    explicit SDLAxisButton(std::shared_ptr<SDLJoystick> joystick_, int axis_, float threshold_,
+                           bool trigger_if_greater_)
+        : joystick(std::move(joystick_)), axis(axis_), threshold(threshold_),
+          trigger_if_greater(trigger_if_greater_) {}
+
+    bool GetStatus() const override {
+        float axis_value = joystick->GetAxis(axis);
+        if (trigger_if_greater)
+            return axis_value > threshold;
+        return axis_value < threshold;
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    int axis;
+    float threshold;
+    bool trigger_if_greater;
+};
+
+class SDLAnalog final : public Input::AnalogDevice {
+public:
+    SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, float deadzone_)
+        : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_) {}
+
+    std::tuple<float, float> GetStatus() const override {
+        const auto [x, y] = joystick->GetAnalog(axis_x, axis_y);
+        const float r = std::sqrt((x * x) + (y * y));
+        if (r > deadzone) {
+            return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
+                                   y / r * (r - deadzone) / (1 - deadzone));
+        }
+        return std::make_tuple<float, float>(0.0f, 0.0f);
+    }
+
+private:
+    std::shared_ptr<SDLJoystick> joystick;
+    const int axis_x;
+    const int axis_y;
+    const float deadzone;
+};
+
+/// A button device factory that creates button devices from SDL joystick
+class SDLButtonFactory final : public Input::Factory<Input::ButtonDevice> {
+public:
+    explicit SDLButtonFactory(SDLState& state_) : state(state_) {}
+
+    /**
+     * Creates a button device from a joystick button
+     * @param params contains parameters for creating the device:
+     *     - "guid": the guid of the joystick to bind
+     *     - "port": the nth joystick of the same type to bind
+     *     - "button"(optional): the index of the button to bind
+     *     - "hat"(optional): the index of the hat to bind as direction buttons
+     *     - "axis"(optional): the index of the axis to bind
+     *     - "direction"(only used for hat): the direction name of the hat to bind. Can be "up",
+     *         "down", "left" or "right"
+     *     - "threshold"(only used for axis): a float value in (-1.0, 1.0) which the button is
+     *         triggered if the axis value crosses
+     *     - "direction"(only used for axis): "+" means the button is triggered when the axis
+     * value is greater than the threshold; "-" means the button is triggered when the axis
+     * value is smaller than the threshold
+     */
+    std::unique_ptr<Input::ButtonDevice> Create(const Common::ParamPackage& params) override {
+        const std::string guid = params.Get("guid", "0");
+        const int port = params.Get("port", 0);
+
+        auto joystick = state.GetSDLJoystickByGUID(guid, port);
+
+        if (params.Has("hat")) {
+            const int hat = params.Get("hat", 0);
+            const std::string direction_name = params.Get("direction", "");
+            Uint8 direction;
+            if (direction_name == "up") {
+                direction = SDL_HAT_UP;
+            } else if (direction_name == "down") {
+                direction = SDL_HAT_DOWN;
+            } else if (direction_name == "left") {
+                direction = SDL_HAT_LEFT;
+            } else if (direction_name == "right") {
+                direction = SDL_HAT_RIGHT;
+            } else {
+                direction = 0;
+            }
+            // This is necessary so accessing GetHat with hat won't crash
+            joystick->SetHat(hat, SDL_HAT_CENTERED);
+            return std::make_unique<SDLDirectionButton>(joystick, hat, direction);
+        }
+
+        if (params.Has("axis")) {
+            const int axis = params.Get("axis", 0);
+            const float threshold = params.Get("threshold", 0.5f);
+            const std::string direction_name = params.Get("direction", "");
+            bool trigger_if_greater;
+            if (direction_name == "+") {
+                trigger_if_greater = true;
+            } else if (direction_name == "-") {
+                trigger_if_greater = false;
+            } else {
+                trigger_if_greater = true;
+                LOG_ERROR(Input, "Unknown direction {}", direction_name);
+            }
+            // This is necessary so accessing GetAxis with axis won't crash
+            joystick->SetAxis(axis, 0);
+            return std::make_unique<SDLAxisButton>(joystick, axis, threshold, trigger_if_greater);
+        }
+
+        const int button = params.Get("button", 0);
+        // This is necessary so accessing GetButton with button won't crash
+        joystick->SetButton(button, false);
+        return std::make_unique<SDLButton>(joystick, button);
+    }
+
+private:
+    SDLState& state;
+};
+
+/// An analog device factory that creates analog devices from SDL joystick
+class SDLAnalogFactory final : public Input::Factory<Input::AnalogDevice> {
+public:
+    explicit SDLAnalogFactory(SDLState& state_) : state(state_) {}
+    /**
+     * Creates analog device from joystick axes
+     * @param params contains parameters for creating the device:
+     *     - "guid": the guid of the joystick to bind
+     *     - "port": the nth joystick of the same type
+     *     - "axis_x": the index of the axis to be bind as x-axis
+     *     - "axis_y": the index of the axis to be bind as y-axis
+     */
+    std::unique_ptr<Input::AnalogDevice> Create(const Common::ParamPackage& params) override {
+        const std::string guid = params.Get("guid", "0");
+        const int port = params.Get("port", 0);
+        const int axis_x = params.Get("axis_x", 0);
+        const int axis_y = params.Get("axis_y", 1);
+        float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, .99f);
+
+        auto joystick = state.GetSDLJoystickByGUID(guid, port);
+
+        // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
+        joystick->SetAxis(axis_x, 0);
+        joystick->SetAxis(axis_y, 0);
+        return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone);
+    }
+
+private:
+    SDLState& state;
+};
+
+SDLState::SDLState() {
+    using namespace Input;
+    RegisterFactory<ButtonDevice>("sdl", std::make_shared<SDLButtonFactory>(*this));
+    RegisterFactory<AnalogDevice>("sdl", std::make_shared<SDLAnalogFactory>(*this));
+
+    // If the frontend is going to manage the event loop, then we dont start one here
+    start_thread = !SDL_WasInit(SDL_INIT_JOYSTICK);
+    if (start_thread && SDL_Init(SDL_INIT_JOYSTICK) < 0) {
+        LOG_CRITICAL(Input, "SDL_Init(SDL_INIT_JOYSTICK) failed with: {}", SDL_GetError());
+        return;
+    }
+    if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
+        LOG_ERROR(Input, "Failed to set Hint for background events", SDL_GetError());
+    }
+
+    SDL_AddEventWatch(&SDLEventWatcher, this);
+
+    initialized = true;
+    if (start_thread) {
+        poll_thread = std::thread([&] {
+            using namespace std::chrono_literals;
+            SDL_Event event;
+            while (initialized) {
+                SDL_PumpEvents();
+                std::this_thread::sleep_for(std::chrono::duration(10ms));
+            }
+        });
+    }
+    // Because the events for joystick connection happens before we have our event watcher added, we
+    // can just open all the joysticks right here
+    for (int i = 0; i < SDL_NumJoysticks(); ++i) {
+        InitJoystick(i);
+    }
+}
+
+SDLState::~SDLState() {
+    using namespace Input;
+    UnregisterFactory<ButtonDevice>("sdl");
+    UnregisterFactory<AnalogDevice>("sdl");
+
+    CloseJoysticks();
+    SDL_DelEventWatch(&SDLEventWatcher, this);
+
+    initialized = false;
+    if (start_thread) {
+        poll_thread.join();
+        SDL_QuitSubSystem(SDL_INIT_JOYSTICK);
+    }
+}
+
+Common::ParamPackage SDLEventToButtonParamPackage(SDLState& state, const SDL_Event& event) {
+    Common::ParamPackage params({{"engine", "sdl"}});
+
+    switch (event.type) {
+    case SDL_JOYAXISMOTION: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("axis", event.jaxis.axis);
+        if (event.jaxis.value > 0) {
+            params.Set("direction", "+");
+            params.Set("threshold", "0.5");
+        } else {
+            params.Set("direction", "-");
+            params.Set("threshold", "-0.5");
+        }
+        break;
+    }
+    case SDL_JOYBUTTONUP: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jbutton.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("button", event.jbutton.button);
+        break;
+    }
+    case SDL_JOYHATMOTION: {
+        auto joystick = state.GetSDLJoystickBySDLID(event.jhat.which);
+        params.Set("port", joystick->GetPort());
+        params.Set("guid", joystick->GetGUID());
+        params.Set("hat", event.jhat.hat);
+        switch (event.jhat.value) {
+        case SDL_HAT_UP:
+            params.Set("direction", "up");
+            break;
+        case SDL_HAT_DOWN:
+            params.Set("direction", "down");
+            break;
+        case SDL_HAT_LEFT:
+            params.Set("direction", "left");
+            break;
+        case SDL_HAT_RIGHT:
+            params.Set("direction", "right");
+            break;
+        default:
+            return {};
+        }
+        break;
+    }
+    }
+    return params;
+}
+
+namespace Polling {
+
+class SDLPoller : public InputCommon::Polling::DevicePoller {
+public:
+    explicit SDLPoller(SDLState& state_) : state(state_) {}
+
+    void Start() override {
+        state.event_queue.Clear();
+        state.polling = true;
+    }
+
+    void Stop() override {
+        state.polling = false;
+    }
+
+protected:
+    SDLState& state;
+};
+
+class SDLButtonPoller final : public SDLPoller {
+public:
+    explicit SDLButtonPoller(SDLState& state_) : SDLPoller(state_) {}
+
+    Common::ParamPackage GetNextInput() override {
+        SDL_Event event;
+        while (state.event_queue.Pop(event)) {
+            switch (event.type) {
+            case SDL_JOYAXISMOTION:
+                if (std::abs(event.jaxis.value / 32767.0) < 0.5) {
+                    break;
+                }
+            case SDL_JOYBUTTONUP:
+            case SDL_JOYHATMOTION:
+                return SDLEventToButtonParamPackage(state, event);
+            }
+        }
+        return {};
+    }
+};
+
+class SDLAnalogPoller final : public SDLPoller {
+public:
+    explicit SDLAnalogPoller(SDLState& state_) : SDLPoller(state_) {}
+
+    void Start() override {
+        SDLPoller::Start();
+
+        // Reset stored axes
+        analog_xaxis = -1;
+        analog_yaxis = -1;
+        analog_axes_joystick = -1;
+    }
+
+    Common::ParamPackage GetNextInput() override {
+        SDL_Event event;
+        while (state.event_queue.Pop(event)) {
+            if (event.type != SDL_JOYAXISMOTION || std::abs(event.jaxis.value / 32767.0) < 0.5) {
+                continue;
+            }
+            // An analog device needs two axes, so we need to store the axis for later and wait for
+            // a second SDL event. The axes also must be from the same joystick.
+            int axis = event.jaxis.axis;
+            if (analog_xaxis == -1) {
+                analog_xaxis = axis;
+                analog_axes_joystick = event.jaxis.which;
+            } else if (analog_yaxis == -1 && analog_xaxis != axis &&
+                       analog_axes_joystick == event.jaxis.which) {
+                analog_yaxis = axis;
+            }
+        }
+        Common::ParamPackage params;
+        if (analog_xaxis != -1 && analog_yaxis != -1) {
+            auto joystick = state.GetSDLJoystickBySDLID(event.jaxis.which);
+            params.Set("engine", "sdl");
+            params.Set("port", joystick->GetPort());
+            params.Set("guid", joystick->GetGUID());
+            params.Set("axis_x", analog_xaxis);
+            params.Set("axis_y", analog_yaxis);
+            analog_xaxis = -1;
+            analog_yaxis = -1;
+            analog_axes_joystick = -1;
+            return params;
+        }
+        return params;
+    }
+
+private:
+    int analog_xaxis = -1;
+    int analog_yaxis = -1;
+    SDL_JoystickID analog_axes_joystick = -1;
+};
+} // namespace Polling
+
+std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> SDLState::GetPollers(
+    InputCommon::Polling::DeviceType type) {
+    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> pollers;
+    switch (type) {
+    case InputCommon::Polling::DeviceType::Analog:
+        pollers.emplace_back(std::make_unique<Polling::SDLAnalogPoller>(*this));
+        break;
+    case InputCommon::Polling::DeviceType::Button:
+        pollers.emplace_back(std::make_unique<Polling::SDLButtonPoller>(*this));
+        break;
+        return pollers;
+    }
+}
+
+} // namespace SDL
+} // namespace InputCommon
--- a/src/input_common/sdl/sdl_impl.h
+++ b/src/input_common/sdl/sdl_impl.h
@@ -0,0 +1,64 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <thread>
+#include "common/threadsafe_queue.h"
+#include "input_common/sdl/sdl.h"
+
+union SDL_Event;
+using SDL_Joystick = struct _SDL_Joystick;
+using SDL_JoystickID = s32;
+
+namespace InputCommon::SDL {
+
+class SDLJoystick;
+class SDLButtonFactory;
+class SDLAnalogFactory;
+
+class SDLState : public State {
+public:
+    /// Initializes and registers SDL device factories
+    SDLState();
+
+    /// Unresisters SDL device factories and shut them down.
+    ~SDLState() override;
+
+    /// Handle SDL_Events for joysticks from SDL_PollEvent
+    void HandleGameControllerEvent(const SDL_Event& event);
+
+    std::shared_ptr<SDLJoystick> GetSDLJoystickBySDLID(SDL_JoystickID sdl_id);
+    std::shared_ptr<SDLJoystick> GetSDLJoystickByGUID(const std::string& guid, int port);
+
+    /// Get all DevicePoller that use the SDL backend for a specific device type
+    std::vector<std::unique_ptr<InputCommon::Polling::DevicePoller>> GetPollers(
+        InputCommon::Polling::DeviceType type) override;
+
+    /// Used by the Pollers during config
+    std::atomic<bool> polling = false;
+    Common::SPSCQueue<SDL_Event> event_queue;
+
+private:
+    void InitJoystick(int joystick_index);
+    void CloseJoystick(SDL_Joystick* sdl_joystick);
+
+    /// Needs to be called before SDL_QuitSubSystem.
+    void CloseJoysticks();
+
+    /// Map of GUID of a list of corresponding virtual Joysticks
+    std::unordered_map<std::string, std::vector<std::shared_ptr<SDLJoystick>>> joystick_map;
+    std::mutex joystick_map_mutex;
+
+    std::shared_ptr<SDLButtonFactory> button_factory;
+    std::shared_ptr<SDLAnalogFactory> analog_factory;
+
+    bool start_thread = false;
+    std::atomic<bool> initialized = false;
+
+    std::thread poll_thread;
+};
+} // namespace InputCommon::SDL
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@

 #include <algorithm>

+#include "common/page_table.h"
 #include "core/core.h"
 #include "core/hle/kernel/process.h"
 #include "core/memory.h"
@@ -13,16 +14,16 @@
 namespace ArmTests {

 TestEnvironment::TestEnvironment(bool mutable_memory_)
-    : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
-
-    auto process = Kernel::Process::Create(kernel, "");
+    : mutable_memory(mutable_memory_),
+      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
+    auto process = Kernel::Process::Create(Core::System::GetInstance(), "");
    kernel.MakeCurrentProcess(process.get());
-    page_table = &Core::CurrentProcess()->VMManager().page_table;
+    page_table = &process->VMManager().page_table;

    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
    page_table->special_regions.clear();
    std::fill(page_table->attributes.begin(), page_table->attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);

    Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
    Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
 #include <vector>

 #include "common/common_types.h"
+#include "common/memory_hook.h"
 #include "core/hle/kernel/kernel.h"
-#include "core/memory_hook.h"

-namespace Memory {
+namespace Common {
 struct PageTable;
 }

@@ -58,7 +58,7 @@ public:

 private:
    friend struct TestMemory;
-    struct TestMemory final : Memory::MemoryHook {
+    struct TestMemory final : Common::MemoryHook {
        explicit TestMemory(TestEnvironment* env_) : env(env_) {}
        TestEnvironment* env;

@@ -86,7 +86,7 @@ private:
    bool mutable_memory;
    std::shared_ptr<TestMemory> test_memory;
    std::vector<WriteRecord> write_records;
-    Memory::PageTable* page_table = nullptr;
+    Common::PageTable* page_table = nullptr;
    Kernel::KernelCore kernel;
 };

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
    engines/shader_header.h
    gpu.cpp
    gpu.h
+    gpu_asynch.cpp
+    gpu_asynch.h
+    gpu_synch.cpp
+    gpu_synch.h
+    gpu_thread.cpp
+    gpu_thread.h
    macro_interpreter.cpp
    macro_interpreter.h
    memory_manager.cpp
@@ -74,6 +80,7 @@ add_library(video_core STATIC
    shader/decode/hfma2.cpp
    shader/decode/conversion.cpp
    shader/decode/memory.cpp
+    shader/decode/texture.cpp
    shader/decode/float_set_predicate.cpp
    shader/decode/integer_set_predicate.cpp
    shader/decode/half_set_predicate.cpp
@@ -94,6 +101,8 @@ add_library(video_core STATIC
    surface.h
    textures/astc.cpp
    textures/astc.h
+    textures/convert.cpp
+    textures/convert.h
    textures/decoders.cpp
    textures/decoders.h
    textures/texture.h
@@ -104,12 +113,18 @@ add_library(video_core STATIC
 if (ENABLE_VULKAN)
    target_sources(video_core PRIVATE
        renderer_vulkan/declarations.h
+        renderer_vulkan/maxwell_to_vk.cpp
+        renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/vk_buffer_cache.cpp
+        renderer_vulkan/vk_buffer_cache.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
        renderer_vulkan/vk_memory_manager.cpp
        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_sampler_cache.cpp
+        renderer_vulkan/vk_sampler_cache.h
        renderer_vulkan/vk_scheduler.cpp
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_stream_buffer.cpp
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
    }

    const CommandList& command_list{dma_pushbuffer.front()};
-    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
    GPUVAddr dma_get = command_list_header.addr;
    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
    bool non_main = command_list_header.is_non_main;
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
    }

    // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
    command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+    gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
+                                  command_list_header.size * sizeof(u32));

    for (const CommandHeader& command_header : command_headers) {

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"

 namespace Tegra::Engines {

--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/memory_manager.h"

--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -9,6 +9,7 @@
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 namespace Tegra::Engines {

@@ -40,17 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);

-    const GPUVAddr address = regs.dest.Address();
-    const auto dest_address =
-        memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
-    ASSERT_MSG(dest_address, "Invalid GPU address");
-
    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might contain
-    // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
+    // We do this before actually writing the new data because the destination address might
+    // contain a dirty surface that will have to be written back to memory.
+    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
+    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
+    memory_manager.Write32(address, data);

-    Memory::Write32(*dest_address, data);
    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
 }

 void Maxwell3D::ProcessQueryGet() {
-    GPUVAddr sequence_address = regs.query.QueryAddress();
+    const GPUVAddr sequence_address{regs.query.QueryAddress()};
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    const auto address = memory_manager.GpuToCpuAddress(sequence_address);
-    ASSERT_MSG(address, "Invalid GPU address");

    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
            // Write the current query sequence to the sequence address.
            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
            // query.
-            Memory::Write32(*address, sequence);
+            memory_manager.Write32(sequence_address, sequence);
        } else {
            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
            // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
            query_result.timestamp = system.CoreTiming().GetTicks();
-            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+            memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
        break;
@@ -393,10 +391,12 @@ void Maxwell3D::ProcessCBData(u32 value) {
    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);

-    const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
-    ASSERT_MSG(address, "Invalid GPU address");
+    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+
+    u8* ptr{memory_manager.GetPointer(address)};
+    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
+    memory_manager.Write32(address, value);

-    Memory::Write32(*address, value);
    dirty_flags.OnMemoryWrite();

    // Increment the current buffer position.
@@ -404,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
 }

 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
-    const GPUVAddr tic_base_address = regs.tic.TICAddress();
-
-    const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
-    ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
+    const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};

    Texture::TICEntry tic_entry;
-    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
+    memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));

    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -429,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
 }

 Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
-    const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
-
-    const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
-    ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
+    const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};

    Texture::TSCEntry tsc_entry;
-    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
+    memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
    return tsc_entry;
 }

@@ -455,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {

-        const auto address = memory_manager.GpuToCpuAddress(current_texture);
-        ASSERT_MSG(address, "Invalid GPU address");
-
-        const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
+        const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};

        Texture::FullTextureInfo tex_info{};
        // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -493,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
-    ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
-
-    const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+    const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};

    Texture::FullTextureInfo tex_info{};
    tex_info.index = static_cast<u32>(offset);
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,11 +3,13 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"
 #include "video_core/textures/decoders.h"

 namespace Tegra::Engines {
@@ -41,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
    const GPUVAddr source = regs.src_address.Address();
    const GPUVAddr dest = regs.dst_address.Address();

-    const auto source_cpu = memory_manager.GpuToCpuAddress(source);
-    const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
-    ASSERT_MSG(source_cpu, "Invalid source GPU address");
-    ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
-
    // TODO(Subv): Perform more research and implement all features of this engine.
    ASSERT(regs.exec.enable_swizzle == 0);
    ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -68,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
        // y_count).
        if (!regs.exec.enable_2d) {
-            Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
+            memory_manager.CopyBlock(dest, source, regs.x_count);
            return;
        }

@@ -77,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
        // rectangle. There is no need to manually flush/invalidate the regions because
        // CopyBlock does that for us.
        for (u32 line = 0; line < regs.y_count; ++line) {
-            const VAddr source_line = *source_cpu + line * regs.src_pitch;
-            const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
-            Memory::CopyBlock(dest_line, source_line, regs.x_count);
+            const GPUVAddr source_line = source + line * regs.src_pitch;
+            const GPUVAddr dest_line = dest + line * regs.dst_pitch;
+            memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
        }
        return;
    }
@@ -88,15 +85,18 @@ void MaxwellDMA::HandleCopy() {

    const std::size_t copy_size = regs.x_count * regs.y_count;

+    auto source_ptr{memory_manager.GetPointer(source)};
+    auto dst_ptr{memory_manager.GetPointer(dest)};
+
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        rasterizer.FlushRegion(*source_cpu, src_size);
+        rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);

        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(*dest_cpu, dst_size);
+        rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
    };

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -109,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
                           copy_size * src_bytes_per_pixel);

        Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
-                                  regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
-                                  *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
+                                  regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
+                                  regs.src_params.BlockHeight(), regs.src_params.pos_x,
                                  regs.src_params.pos_y);
    } else {
        ASSERT(regs.dst_params.size_z == 1);
@@ -123,7 +123,7 @@ void MaxwellDMA::HandleCopy() {

        // If the input is linear and the output is tiled, swizzle the input and copy it over.
        Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
-                                src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
+                                src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
    }
 }

--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/Show More
+++ b/Show More