gl_rasterizer: Encapsulate sampler queries into methods

Merge pull request #2210 from lioncash/optional
kernel/hle_ipc: Convert std::shared_ptr IPC header instances to std::optional
2019-03-09 04:35:57 -03:00 · 2019-03-08 16:35:57 -05:00 · 2019-03-08 12:04:26 -05:00 · 2019-03-08 12:03:58 -05:00 · 2019-03-08 11:51:08 -05:00 · 2019-03-08 11:48:32 -05:00
126 changed files with 2603 additions and 756 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - os: osx
      env: NAME="macos build"
      sudo: false
-      osx_image: xcode10
+      osx_image: xcode10.1
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@

 set -o pipefail

-export MACOSX_DEPLOYMENT_TARGET=10.13
+export MACOSX_DEPLOYMENT_TARGET=10.14
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr

 It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.

-yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
+yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.

 yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

--- a/externals/cubeb
+++ b/externals/cubeb
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
    u32_le sample_rate;
    u32_le sample_count;
    u32_le mix_buffer_count;
-    u32_le unknown_c;
+    u32_le submix_count;
    u32_le voice_count;
    u32_le sink_count;
    u32_le effect_count;
-    u32_le unknown_1c;
-    u8 unknown_20;
-    INSERT_PADDING_BYTES(3);
+    u32_le performance_frame_count;
+    u8 is_voice_drop_enabled;
+    u8 unknown_21;
+    u8 unknown_22;
+    u8 execution_mode;
    u32_le splitter_count;
-    u32_le unknown_2c;
-    INSERT_PADDING_WORDS(1);
+    u32_le num_splitter_send_channels;
+    u32_le unknown_30;
    u32_le revision;
 };
 static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"

+#ifdef _MSC_VER
+#include <objbase.h>
+#endif
+
 namespace AudioCore {

 class CubebSinkStream final : public SinkStream {
@@ -108,6 +112,11 @@ private:
 };

 CubebSink::CubebSink(std::string_view target_device_name) {
+    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
+#ifdef _MSC_VER
+    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+#endif
+
    if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
        LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
        return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
    }

    cubeb_destroy(ctx);
+
+#ifdef _MSC_VER
+    if (SUCCEEDED(com_init_result)) {
+        CoUninitialize();
+    }
+#endif
 }

 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
    cubeb* ctx{};
    cubeb_devid output_device{};
    std::vector<SinkStreamPtr> sink_streams;
+
+#ifdef _MSC_VER
+    u32 com_init_result = 0;
+#endif
 };

 std::vector<std::string> ListCubebSinkDevices();
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
 template <std::size_t Position, std::size_t Bits, typename T>
 struct BitField {
 private:
-    // We hide the copy assigment operator here, because the default copy
-    // assignment would copy the full storage value, rather than just the bits
-    // relevant to this particular bit field.
-    // We don't delete it because we want BitField to be trivially copyable.
-    constexpr BitField& operator=(const BitField&) = default;
-
    // UnderlyingType is T for non-enum types and the underlying type of T if
    // T is an enumeration. Note that T is wrapped within an enable_if in the
    // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
    BitField(T val) = delete;
    BitField& operator=(T val) = delete;

-    // Force default constructor to be created
-    // so that we can use this within unions
-    constexpr BitField() = default;
+    constexpr BitField() noexcept = default;
+
+    constexpr BitField(const BitField&) noexcept = default;
+    constexpr BitField& operator=(const BitField&) noexcept = default;
+
+    constexpr BitField(BitField&&) noexcept = default;
+    constexpr BitField& operator=(BitField&&) noexcept = default;

    constexpr FORCE_INLINE operator T() const {
        return Value();
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
 /**
 * Decode a color stored in RGBA8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
    return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }

 /**
 * Decode a color stored in RGB8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
    return {bytes[2], bytes[1], bytes[0], 255};
 }

 /**
 * Decode a color stored in RG8 (aka HILO8) format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
    return {bytes[1], bytes[0], 0, 255};
 }

 /**
 * Decode a color stored in RGB565 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
 /**
 * Decode a color stored in RGB5A1 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
 /**
 * Decode a color stored in RGBA4 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
 /**
 * Decode a depth value and a stencil value stored in D24S8 format
 * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Math::Vec2
+ * @return Resulting values stored as a Common::Vec2
 */
-inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
    return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }

@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[3] = color.r();
    bytes[2] = color.g();
    bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[2] = color.r();
    bytes[1] = color.g();
    bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[1] = color.r();
    bytes[0] = color.g();
 }
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data =
        (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());

@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());

@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());

--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,8 +39,10 @@ public:
    Impl(Impl const&) = delete;
    const Impl& operator=(Impl const&) = delete;

-    void PushEntry(Entry e) {
-        message_queue.Push(std::move(e));
+    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
+                   const char* function, std::string message) {
+        message_queue.Push(
+            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
    }

    void AddBackend(std::unique_ptr<Backend> backend) {
@@ -108,11 +110,30 @@ private:
        backend_thread.join();
    }

+    Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
+                      const char* function, std::string message) const {
+        using std::chrono::duration_cast;
+        using std::chrono::steady_clock;
+
+        Entry entry;
+        entry.timestamp =
+            duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
+        entry.log_class = log_class;
+        entry.log_level = log_level;
+        entry.filename = Common::TrimSourcePath(filename);
+        entry.line_num = line_nr;
+        entry.function = function;
+        entry.message = std::move(message);
+
+        return entry;
+    }
+
    std::mutex writing_mutex;
    std::thread backend_thread;
    std::vector<std::unique_ptr<Backend>> backends;
    Common::MPSCQueue<Log::Entry> message_queue;
    Filter filter;
+    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
 };

 void ConsoleBackend::Write(const Entry& entry) {
@@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) {
 #undef LVL
 }

-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message) {
-    using std::chrono::duration_cast;
-    using std::chrono::steady_clock;
-
-    static steady_clock::time_point time_origin = steady_clock::now();
-
-    Entry entry;
-    entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
-    entry.log_class = log_class;
-    entry.log_level = log_level;
-    entry.filename = Common::TrimSourcePath(filename);
-    entry.line_num = line_nr;
-    entry.function = function;
-    entry.message = std::move(message);
-
-    return entry;
-}
-
 void SetGlobalFilter(const Filter& filter) {
    Impl::Instance().SetGlobalFilter(filter);
 }
@@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
    if (!filter.CheckMessage(log_class, log_level))
        return;

-    Entry entry =
-        CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
-
-    instance.PushEntry(std::move(entry));
+    instance.PushEntry(log_class, log_level, filename, line_num, function,
+                       fmt::vformat(format, args));
 }
 } // namespace Log
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class);
 */
 const char* GetLevelName(Level log_level);

-/// Creates a log entry by formatting the given source location, and message.
-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message);
-
 /**
 * The global filter will prevent any messages from even being processed if they are filtered. Each
 * backend can have a filter, but if the level is lower than the global filter, the backend will
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
 #include <cstdlib>
 #include <type_traits>

-namespace MathUtil {
+namespace Common {

 constexpr float PI = 3.14159265f;

@@ -41,4 +41,4 @@ struct Rectangle {
    }
 };

-} // namespace MathUtil
+} // namespace Common
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@

 #include "common/vector_math.h"

-namespace Math {
+namespace Common {

 template <typename T>
 class Quaternion {
 public:
-    Math::Vec3<T> xyz;
+    Vec3<T> xyz;
    T w{};

    Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
 };

 template <typename T>
-auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
+auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
    return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
 }

-inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
+inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
    return {axis * std::sin(angle / 2), std::cos(angle / 2)};
 }

-} // namespace Math
+} // namespace Common
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
 #include <cmath>
 #include <type_traits>

-namespace Math {
+namespace Common {

 template <typename T>
 class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
    return MakeVec(x, yzw[0], yzw[1], yzw[2]);
 }

-} // namespace Math
+} // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
    hle/service/audio/audren_u.h
    hle/service/audio/codecctl.cpp
    hle/service/audio/codecctl.h
+    hle/service/audio/errors.h
    hle/service/audio/hwopus.cpp
    hle/service/audio/hwopus.h
    hle/service/bcat/bcat.cpp
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
 #include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
    return vfs->OpenFile(path, FileSys::Mode::Read);
 }
 struct System::Impl {
+    explicit Impl(System& system) : kernel{system} {}

    Cpu& CurrentCpuCore() {
        return cpu_core_manager.GetCurrentCore();
@@ -95,7 +97,7 @@ struct System::Impl {
        LOG_DEBUG(HW_Memory, "initialized OK");

        core_timing.Initialize();
-        kernel.Initialize(core_timing);
+        kernel.Initialize();

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
            std::chrono::system_clock::now().time_since_epoch());
@@ -128,10 +130,16 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());
+        is_powered_on = true;
+
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
+        } else {
+            gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
+        }

        cpu_core_manager.Initialize(system);
-        is_powered_on = true;
+
        LOG_DEBUG(Core, "Initialized OK");

        // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {

    void Shutdown() {
        // Log last frame performance stats
-        auto perf_results = GetAndResetPerfStats();
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
-                             perf_results.emulation_speed * 100.0);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
-                             perf_results.game_fps);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
-                             perf_results.frametime * 1000.0);
+        const auto perf_results = GetAndResetPerfStats();
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
+                                    perf_results.emulation_speed * 100.0);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
+                                    perf_results.game_fps);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
+                                    perf_results.frametime * 1000.0);

        is_powered_on = false;

@@ -265,7 +273,7 @@ struct System::Impl {
    Core::FrameLimiter frame_limiter;
 };

-System::System() : impl{std::make_unique<Impl>()} {}
+System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;

 Cpu& System::CurrentCpuCore() {
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
    return System::GetInstance().CurrentArmInterface();
 }

-inline TelemetrySession& Telemetry() {
-    return System::GetInstance().TelemetrySession();
-}
-
 inline Kernel::Process* CurrentProcess() {
    return System::GetInstance().CurrentProcess();
 }
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
            framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
 }

-std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
+std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
    new_x = std::max(new_x, framebuffer_layout.screen.left);
    new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
    /**
     * Clip the provided coordinates to be inside the touchscreen area.
     */
-    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
+    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
 };

 } // namespace Core::Frontend
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {

 // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
 template <class T>
-static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
-                                           float screen_aspect_ratio) {
+static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
+                                         float screen_aspect_ratio) {
    float scale = std::min(static_cast<float>(window_area.GetWidth()),
                           window_area.GetHeight() / screen_aspect_ratio);
-    return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
-                                  static_cast<T>(std::round(scale * screen_aspect_ratio))};
+    return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
+                                static_cast<T>(std::round(scale * screen_aspect_ratio))};
 }

 FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {

    const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
                                       ScreenUndocked::Width};
-    MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
-    MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
+    Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
+    Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);

    float window_aspect_ratio = static_cast<float>(height) / width;

--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
    unsigned width{ScreenUndocked::Width};
    unsigned height{ScreenUndocked::Height};

-    MathUtil::Rectangle<unsigned> screen;
+    Common::Rectangle<unsigned> screen;

    /**
     * Returns the ration of pixel size of the screen, compared to the native size of the undocked
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
 *   Orientation is determined by right-hand rule.
 *   Units: deg/sec
 */
-using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
+using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;

 /**
 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@

 #pragma once

+#include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
-#include "core/hle/kernel/errors.h"
-#include "core/memory.h"

 namespace IPC {

--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -350,7 +350,7 @@ public:
    template <class T>
    std::shared_ptr<T> PopIpcInterface() {
        ASSERT(context->Session()->IsDomain());
-        ASSERT(context->GetDomainMessageHeader()->input_object_count > 0);
+        ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
        return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
    }
 };
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
@@ -17,32 +18,144 @@
 #include "core/hle/result.h"
 #include "core/memory.h"

-namespace Kernel::AddressArbiter {
+namespace Kernel {
+namespace {
+// Wake up num_to_wake (or all) threads in a vector.
+void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
+    // them all.
+    std::size_t last = waiting_threads.size();
+    if (num_to_wake > 0) {
+        last = num_to_wake;
+    }

-// Performs actual address waiting logic.
-static ResultCode WaitForAddress(VAddr address, s64 timeout) {
-    SharedPtr<Thread> current_thread = GetCurrentThread();
+    // Signal the waiting threads.
+    for (std::size_t i = 0; i < last; i++) {
+        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
+        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        waiting_threads[i]->SetArbiterWaitAddress(0);
+        waiting_threads[i]->ResumeFromWait();
+    }
+}
+} // Anonymous namespace
+
+AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
+AddressArbiter::~AddressArbiter() = default;
+
+ResultCode AddressArbiter::SignalToAddress(VAddr address, s32 num_to_wake) {
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                              s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(value + 1));
+    return SignalToAddress(address, num_to_wake);
+}
+
+ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                                         s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    // Get threads waiting on the address.
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+
+    // Determine the modified value depending on the waiting count.
+    s32 updated_value;
+    if (waiting_threads.empty()) {
+        updated_value = value - 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value + 1;
+    } else {
+        updated_value = value;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(updated_value));
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                                    bool should_decrement) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const s32 cur_value = static_cast<s32>(Memory::Read32(address));
+    if (cur_value >= value) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (should_decrement) {
+        Memory::Write32(address, static_cast<u32>(cur_value - 1));
+    }
+
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddress(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+    // Only wait for the address if equal.
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddress(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddress(VAddr address, s64 timeout) {
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();

    current_thread->WakeAfterDelay(timeout);

-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
    return RESULT_TIMEOUT;
 }

-// Gets the threads waiting on an address.
-static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr arb_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
+    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
+                                               std::vector<SharedPtr<Thread>>& waiting_threads,
+                                               VAddr arb_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
        const auto& thread_list = scheduler.GetThreadList();

        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr)
+            if (thread->GetArbiterWaitAddress() == arb_addr) {
                waiting_threads.push_back(thread);
+            }
        }
    };

@@ -61,118 +174,4 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)

    return threads;
 }
-
-// Wake up num_to_wake (or all) threads in a vector.
-static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
-    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0)
-        last = num_to_wake;
-
-    // Signal the waiting threads.
-    for (std::size_t i = 0; i < last; i++) {
-        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
-        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
-        waiting_threads[i]->SetArbiterWaitAddress(0);
-        waiting_threads[i]->ResumeFromWait();
-    }
-}
-
-// Signals an address being waited on.
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Signals an address being waited on and increments its value if equal to the value argument.
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(value + 1));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    return SignalToAddress(address, num_to_wake);
-}
-
-// Signals an address being waited on and modifies its value based on waiting thread count if equal
-// to the value argument.
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                         s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Get threads waiting on the address.
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    // Determine the modified value depending on the waiting count.
-    s32 updated_value;
-    if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-        updated_value = value + 1;
-    } else {
-        updated_value = value;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(updated_value));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Waits on an address if the value passed is less than the argument value, optionally decrementing.
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    s32 cur_value = static_cast<s32>(Memory::Read32(address));
-    if (cur_value < value) {
-        if (should_decrement) {
-            Memory::Write32(address, static_cast<u32>(cur_value - 1));
-        }
-    } else {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-
-// Waits on an address if the value passed is equal to the argument value.
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-    // Only wait for the address if equal.
-    if (static_cast<s32>(Memory::Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-} // namespace Kernel::AddressArbiter
+} // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -5,28 +5,68 @@
 #pragma once

 #include "common/common_types.h"
+#include "core/hle/kernel/address_arbiter.h"

 union ResultCode;

-namespace Kernel::AddressArbiter {
+namespace Core {
+class System;
+}

-enum class ArbitrationType {
-    WaitIfLessThan = 0,
-    DecrementAndWaitIfLessThan = 1,
-    WaitIfEqual = 2,
+namespace Kernel {
+
+class Thread;
+
+class AddressArbiter {
+public:
+    enum class ArbitrationType {
+        WaitIfLessThan = 0,
+        DecrementAndWaitIfLessThan = 1,
+        WaitIfEqual = 2,
+    };
+
+    enum class SignalType {
+        Signal = 0,
+        IncrementAndSignalIfEqual = 1,
+        ModifyByWaitingCountAndSignalIfEqual = 2,
+    };
+
+    explicit AddressArbiter(Core::System& system);
+    ~AddressArbiter();
+
+    AddressArbiter(const AddressArbiter&) = delete;
+    AddressArbiter& operator=(const AddressArbiter&) = delete;
+
+    AddressArbiter(AddressArbiter&&) = default;
+    AddressArbiter& operator=(AddressArbiter&&) = delete;
+
+    /// Signals an address being waited on.
+    ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
+
+    /// Signals an address being waited on and increments its value if equal to the value argument.
+    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+
+    /// Signals an address being waited on and modifies its value based on waiting thread count if
+    /// equal to the value argument.
+    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                             s32 num_to_wake);
+
+    /// Waits on an address if the value passed is less than the argument value,
+    /// optionally decrementing.
+    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                        bool should_decrement);
+
+    /// Waits on an address if the value passed is equal to the argument value.
+    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
+
+private:
+    // Waits on the given address with a timeout in nanoseconds
+    ResultCode WaitForAddress(VAddr address, s64 timeout);
+
+    // Gets the threads waiting on an address.
+    std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+
+    Core::System& system;
 };

-enum class SignalType {
-    Signal = 0,
-    IncrementAndSignalIfEqual = 1,
-    ModifyByWaitingCountAndSignalIfEqual = 2,
-};
-
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-
-} // namespace Kernel::AddressArbiter
+} // namespace Kernel
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
    // the emulated application.

-    // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they
+    // A local reference to the ServerSession is necessary to guarantee it
    // will be kept alive until after ClientDisconnected() returns.
    SharedPtr<ServerSession> server = parent->server;
    if (server) {
-        std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler;
-        if (hle_handler)
-            hle_handler->ClientDisconnected(server);
-
-        // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
-        // their WaitSynchronization result to 0xC920181A.
-
-        // Clean up the list of client threads with pending requests, they are unneeded now that the
-        // client endpoint is closed.
-        server->pending_requesting_threads.clear();
-        server->currently_handling = nullptr;
+        server->ClientDisconnected();
    }

    parent->client = nullptr;
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:

    ResultCode SendSyncRequest(SharedPtr<Thread> thread);

-    std::string name; ///< Name of client port (optional)
+private:
+    explicit ClientSession(KernelCore& kernel);
+    ~ClientSession() override;

    /// The parent session, which links to the server endpoint.
    std::shared_ptr<Session> parent;

-private:
-    explicit ClientSession(KernelCore& kernel);
-    ~ClientSession() override;
+    /// Name of the client session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
+constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
 namespace Kernel {
 namespace {
 constexpr u16 GetSlot(Handle handle) {
-    return handle >> 15;
+    return static_cast<u16>(handle >> 15);
 }

 constexpr u16 GetGeneration(Handle handle) {
-    return handle & 0x7FFF;
+    return static_cast<u16>(handle & 0x7FFF);
 }
 } // Anonymous namespace

 HandleTable::HandleTable() {
-    next_generation = 1;
    Clear();
 }

 HandleTable::~HandleTable() = default;

+ResultCode HandleTable::SetSize(s32 handle_table_size) {
+    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // Values less than or equal to zero indicate to use the maximum allowable
+    // size for the handle table in the actual kernel, so we ignore the given
+    // value in that case, since we assume this by default unless this function
+    // is called.
+    if (handle_table_size > 0) {
+        table_size = static_cast<u16>(handle_table_size);
+    }
+
+    return RESULT_SUCCESS;
+}
+
 ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
    DEBUG_ASSERT(obj != nullptr);

-    u16 slot = next_free_slot;
-    if (slot >= generations.size()) {
+    const u16 slot = next_free_slot;
+    if (slot >= table_size) {
        LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
        return ERR_HANDLE_TABLE_FULL;
    }
    next_free_slot = generations[slot];

-    u16 generation = next_generation++;
+    const u16 generation = next_generation++;

    // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
    // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 }

 ResultCode HandleTable::Close(Handle handle) {
-    if (!IsValid(handle))
+    if (!IsValid(handle)) {
        return ERR_INVALID_HANDLE;
+    }

-    u16 slot = GetSlot(handle);
+    const u16 slot = GetSlot(handle);

    objects[slot] = nullptr;

@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
 }

 bool HandleTable::IsValid(Handle handle) const {
-    std::size_t slot = GetSlot(handle);
-    u16 generation = GetGeneration(handle);
+    const std::size_t slot = GetSlot(handle);
+    const u16 generation = GetGeneration(handle);

-    return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
+    return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
 }

 SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
 }

 void HandleTable::Clear() {
-    for (u16 i = 0; i < MAX_COUNT; ++i) {
+    for (u16 i = 0; i < table_size; ++i) {
        generations[i] = i + 1;
        objects[i] = nullptr;
    }
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -49,6 +49,20 @@ public:
    HandleTable();
    ~HandleTable();

+    /**
+     * Sets the number of handles that may be in use at one time
+     * for this handle table.
+     *
+     * @param handle_table_size The desired size to limit the handle table to.
+     *
+     * @returns an error code indicating if initialization was successful.
+     *          If initialization was not successful, then ERR_OUT_OF_MEMORY
+     *          will be returned.
+     *
+     * @pre handle_table_size must be within the range [0, 1024]
+     */
+    ResultCode SetSize(s32 handle_table_size);
+
    /**
     * Allocates a handle for the given object.
     * @return The created Handle or one of the following errors:
@@ -103,14 +117,21 @@ private:
     */
    std::array<u16, MAX_COUNT> generations;

+    /**
+     * The limited size of the handle table. This can be specified by process
+     * capabilities in order to restrict the overall number of handles that
+     * can be created in a process instance
+     */
+    u16 table_size = static_cast<u16>(MAX_COUNT);
+
    /**
     * Global counter of the number of created handles. Stored in `generations` when a handle is
     * created, and wraps around to 1 when it hits 0x8000.
     */
-    u16 next_generation;
+    u16 next_generation = 1;

    /// Head of the free slots linked list.
-    u16 next_free_slot;
+    u16 next_free_slot = 0;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
 void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
                                           bool incoming) {
    IPC::RequestParser rp(src_cmdbuf);
-    command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>());
+    command_header = rp.PopRaw<IPC::CommandHeader>();

    if (command_header->type == IPC::CommandType::Close) {
        // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_

    // If handle descriptor is present, add size of it
    if (command_header->enable_handle_descriptor) {
-        handle_descriptor_header =
-            std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
+        handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
        if (handle_descriptor_header->send_current_pid) {
            rp.Skip(2, false);
        }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
        // If this is an incoming message, only CommandType "Request" has a domain header
        // All outgoing domain messages have the domain header, if only incoming has it
        if (incoming || domain_message_header) {
-            domain_message_header =
-                std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
+            domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
        } else {
-            if (Session()->IsDomain())
+            if (Session()->IsDomain()) {
                LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
+            }
        }
    }

-    data_payload_header =
-        std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
+    data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();

    data_payload_offset = rp.GetCurrentOffset();

@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
        // Write the domain objects to the command buffer, these go after the raw untranslated data.
        // TODO(Subv): This completely ignores C buffers.
        std::size_t domain_offset = size - domain_message_header->num_objects;
-        auto& request_handlers = server_session->domain_request_handlers;

-        for (auto& object : domain_objects) {
-            request_handlers.emplace_back(object);
-            dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size());
+        for (const auto& object : domain_objects) {
+            server_session->AppendDomainRequestHandler(object);
+            dst_cmdbuf[domain_offset++] =
+                static_cast<u32_le>(server_session->NumDomainRequestHandlers());
        }
    }

--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -15,6 +16,8 @@
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/object.h"

+union ResultCode;
+
 namespace Service {
 class ServiceFrameworkBase;
 }
@@ -166,12 +169,12 @@ public:
        return buffer_c_desciptors;
    }

-    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
-        return domain_message_header.get();
+    const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
+        return domain_message_header.value();
    }

    bool HasDomainMessageHeader() const {
-        return domain_message_header != nullptr;
+        return domain_message_header.has_value();
    }

    /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:

    template <typename T>
    SharedPtr<T> GetCopyObject(std::size_t index) {
-        ASSERT(index < copy_objects.size());
-        return DynamicObjectCast<T>(copy_objects[index]);
+        return DynamicObjectCast<T>(copy_objects.at(index));
    }

    template <typename T>
    SharedPtr<T> GetMoveObject(std::size_t index) {
-        ASSERT(index < move_objects.size());
-        return DynamicObjectCast<T>(move_objects[index]);
+        return DynamicObjectCast<T>(move_objects.at(index));
    }

    void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:

    template <typename T>
    std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
-        return std::static_pointer_cast<T>(domain_request_handlers[index]);
+        return std::static_pointer_cast<T>(domain_request_handlers.at(index));
    }

    void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
    boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
    boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;

-    std::shared_ptr<IPC::CommandHeader> command_header;
-    std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header;
-    std::shared_ptr<IPC::DataPayloadHeader> data_payload_header;
-    std::shared_ptr<IPC::DomainMessageHeader> domain_message_header;
+    std::optional<IPC::CommandHeader> command_header;
+    std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
+    std::optional<IPC::DataPayloadHeader> data_payload_header;
+    std::optional<IPC::DomainMessageHeader> domain_message_header;
    std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@

 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 }

 struct KernelCore::Impl {
-    void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
+    explicit Impl(Core::System& system) : address_arbiter{system}, system{system} {}
+
+    void Initialize(KernelCore& kernel) {
        Shutdown();

        InitializeSystemResourceLimit(kernel);
-        InitializeThreads(core_timing);
+        InitializeThreads();
    }

    void Shutdown() {
@@ -122,9 +125,9 @@ struct KernelCore::Impl {
        ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
    }

-    void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
+    void InitializeThreads() {
        thread_wakeup_event_type =
-            core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    std::atomic<u32> next_object_id{0};
@@ -135,6 +138,8 @@ struct KernelCore::Impl {
    std::vector<SharedPtr<Process>> process_list;
    Process* current_process = nullptr;

+    Kernel::AddressArbiter address_arbiter;
+
    SharedPtr<ResourceLimit> system_resource_limit;

    Core::Timing::EventType* thread_wakeup_event_type = nullptr;
@@ -145,15 +150,18 @@ struct KernelCore::Impl {
    /// Map of named ports managed by the kernel, which can be retrieved using
    /// the ConnectToPort SVC.
    NamedPortTable named_ports;
+
+    // System context
+    Core::System& system;
 };

-KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
+KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
 KernelCore::~KernelCore() {
    Shutdown();
 }

-void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
-    impl->Initialize(*this, core_timing);
+void KernelCore::Initialize() {
+    impl->Initialize(*this);
 }

 void KernelCore::Shutdown() {
@@ -184,6 +192,14 @@ const Process* KernelCore::CurrentProcess() const {
    return impl->current_process;
 }

+AddressArbiter& KernelCore::AddressArbiter() {
+    return impl->address_arbiter;
+}
+
+const AddressArbiter& KernelCore::AddressArbiter() const {
+    return impl->address_arbiter;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
    impl->named_ports.emplace(std::move(name), std::move(port));
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,10 @@
 template <typename T>
 class ResultVal;

+namespace Core {
+class System;
+}
+
 namespace Core::Timing {
 class CoreTiming;
 struct EventType;
@@ -18,6 +22,7 @@ struct EventType;

 namespace Kernel {

+class AddressArbiter;
 class ClientPort;
 class HandleTable;
 class Process;
@@ -30,7 +35,14 @@ private:
    using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;

 public:
-    KernelCore();
+    /// Constructs an instance of the kernel using the given System
+    /// instance as a context for any necessary system-related state,
+    /// such as threads, CPU core state, etc.
+    ///
+    /// @post After execution of the constructor, the provided System
+    ///       object *must* outlive the kernel instance itself.
+    ///
+    explicit KernelCore(Core::System& system);
    ~KernelCore();

    KernelCore(const KernelCore&) = delete;
@@ -40,11 +52,7 @@ public:
    KernelCore& operator=(KernelCore&&) = delete;

    /// Resets the kernel to a clean slate for use.
-    ///
-    /// @param core_timing CoreTiming instance used to create any necessary
-    ///                    kernel-specific callback events.
-    ///
-    void Initialize(Core::Timing::CoreTiming& core_timing);
+    void Initialize();

    /// Clears all resources in use by the kernel instance.
    void Shutdown();
@@ -67,6 +75,12 @@ public:
    /// Retrieves a const pointer to the current process.
    const Process* CurrentProcess() const;

+    /// Provides a reference to the kernel's address arbiter.
+    Kernel::AddressArbiter& AddressArbiter();
+
+    /// Provides a const reference to the kernel's address arbiter.
+    const Kernel::AddressArbiter& AddressArbiter() const;
+
    /// Adds a port to the named port table
    void AddNamedPort(std::string name, SharedPtr<ClientPort> port);

--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
    vm_manager.Reset(metadata.GetAddressSpaceType());

    const auto& caps = metadata.GetKernelCapabilities();
-    return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    const auto capability_init_result =
+        capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    if (capability_init_result.IsError()) {
+        return capability_init_result;
+    }
+
+    return handle_table.SetSize(capabilities.GetHandleTableSize());
 }

 void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
    interrupt_capabilities.set();

    // Allow using the maximum possible amount of handles
-    handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
+    handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);

    // Allow all debugging capabilities.
    is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
        return ERR_RESERVED_VALUE;
    }

-    handle_table_size = (flags >> 16) & 0x3FF;
+    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
    }

    /// Gets the number of total allowable handles for the process' handle table.
-    u32 GetHandleTableSize() const {
+    s32 GetHandleTableSize() const {
        return handle_table_size;
    }

@@ -252,7 +252,7 @@ private:
    u64 core_mask = 0;
    u64 priority_mask = 0;

-    u32 handle_table_size = 0;
+    s32 handle_table_size = 0;
    u32 kernel_version = 0;

    ProgramType program_type = ProgramType::SysModule;
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
    pending_requesting_threads.pop_back();
 }

-ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto* const domain_message_header = context.GetDomainMessageHeader();
-    if (domain_message_header) {
-        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
-        context.SetDomainRequestHandlers(domain_request_handlers);
-
-        // If there is a DomainMessageHeader, then this is CommandType "Request"
-        const u32 object_id{context.GetDomainMessageHeader()->object_id};
-        switch (domain_message_header->command) {
-        case IPC::DomainMessageHeader::CommandType::SendMessage:
-            if (object_id > domain_request_handlers.size()) {
-                LOG_CRITICAL(IPC,
-                             "object_id {} is too big! This probably means a recent service call "
-                             "to {} needed to return a new interface!",
-                             object_id, name);
-                UNREACHABLE();
-                return RESULT_SUCCESS; // Ignore error if asserts are off
-            }
-            return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
-
-        case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
-            LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
-
-            domain_request_handlers[object_id - 1] = nullptr;
-
-            IPC::ResponseBuilder rb{context, 2};
-            rb.Push(RESULT_SUCCESS);
-            return RESULT_SUCCESS;
-        }
-        }
-
-        LOG_CRITICAL(IPC, "Unknown domain command={}",
-                     static_cast<int>(domain_message_header->command.Value()));
-        ASSERT(false);
+void ServerSession::ClientDisconnected() {
+    // We keep a shared pointer to the hle handler to keep it alive throughout
+    // the call to ClientDisconnected, as ClientDisconnected invalidates the
+    // hle_handler member itself during the course of the function executing.
+    std::shared_ptr<SessionRequestHandler> handler = hle_handler;
+    if (handler) {
+        // Note that after this returns, this server session's hle_handler is
+        // invalidated (set to null).
+        handler->ClientDisconnected(this);
    }

+    // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
+    // their WaitSynchronization result to 0xC920181A.
+
+    // Clean up the list of client threads with pending requests, they are unneeded now that the
+    // client endpoint is closed.
+    pending_requesting_threads.clear();
+    currently_handling = nullptr;
+}
+
+void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
+    domain_request_handlers.push_back(std::move(handler));
+}
+
+std::size_t ServerSession::NumDomainRequestHandlers() const {
+    return domain_request_handlers.size();
+}
+
+ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
+    if (!context.HasDomainMessageHeader()) {
+        return RESULT_SUCCESS;
+    }
+
+    // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
+    context.SetDomainRequestHandlers(domain_request_handlers);
+
+    // If there is a DomainMessageHeader, then this is CommandType "Request"
+    const auto& domain_message_header = context.GetDomainMessageHeader();
+    const u32 object_id{domain_message_header.object_id};
+    switch (domain_message_header.command) {
+    case IPC::DomainMessageHeader::CommandType::SendMessage:
+        if (object_id > domain_request_handlers.size()) {
+            LOG_CRITICAL(IPC,
+                         "object_id {} is too big! This probably means a recent service call "
+                         "to {} needed to return a new interface!",
+                         object_id, name);
+            UNREACHABLE();
+            return RESULT_SUCCESS; // Ignore error if asserts are off
+        }
+        return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
+
+    case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
+        LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
+
+        domain_request_handlers[object_id - 1] = nullptr;
+
+        IPC::ResponseBuilder rb{context, 2};
+        rb.Push(RESULT_SUCCESS);
+        return RESULT_SUCCESS;
+    }
+    }
+
+    LOG_CRITICAL(IPC, "Unknown domain command={}",
+                 static_cast<int>(domain_message_header.command.Value()));
+    ASSERT(false);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
        return HANDLE_TYPE;
    }

+    Session* GetParent() {
+        return parent.get();
+    }
+
+    const Session* GetParent() const {
+        return parent.get();
+    }
+
    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;

    /**
@@ -78,23 +86,16 @@ public:

    void Acquire(Thread* thread) override;

-    std::string name;                ///< The name of this session (optional)
-    std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint.
-    std::shared_ptr<SessionRequestHandler>
-        hle_handler; ///< This session's HLE request handler (applicable when not a domain)
+    /// Called when a client disconnection occurs.
+    void ClientDisconnected();

-    /// This is the list of domain request handlers (after conversion to a domain)
-    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+    /// Adds a new domain request handler to the collection of request handlers within
+    /// this ServerSession instance.
+    void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);

-    /// List of threads that are pending a response after a sync request. This list is processed in
-    /// a LIFO manner, thus, the last request will be dispatched first.
-    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
-    std::vector<SharedPtr<Thread>> pending_requesting_threads;
-
-    /// Thread whose request is currently being handled. A request is considered "handled" when a
-    /// response is sent via svcReplyAndReceive.
-    /// TODO(Subv): Find a better name for this.
-    SharedPtr<Thread> currently_handling;
+    /// Retrieves the total number of domain request handlers that have been
+    /// appended to this ServerSession instance.
+    std::size_t NumDomainRequestHandlers() const;

    /// Returns true if the session has been converted to a domain, otherwise False
    bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
    /// object handle.
    ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);

+    /// The parent session, which links to the client endpoint.
+    std::shared_ptr<Session> parent;
+
+    /// This session's HLE request handler (applicable when not a domain)
+    std::shared_ptr<SessionRequestHandler> hle_handler;
+
+    /// This is the list of domain request handlers (after conversion to a domain)
+    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+
+    /// List of threads that are pending a response after a sync request. This list is processed in
+    /// a LIFO manner, thus, the last request will be dispatched first.
+    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
+    std::vector<SharedPtr<Thread>> pending_requesting_threads;
+
+    /// Thread whose request is currently being handled. A request is considered "handled" when a
+    /// response is sent via svcReplyAndReceive.
+    /// TODO(Subv): Find a better name for this.
+    SharedPtr<Thread> currently_handling;
+
    /// When set to True, converts the session to a domain at the end of the command
    bool convert_to_domain{};
+
+    /// The name of this session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
        shared_memory->backing_block_offset = 0;

        // Refresh the address mappings for the current process.
-        if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
+        if (kernel.CurrentProcess() != nullptr) {
+            kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                shared_memory->backing_block.get());
        }
    } else {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
    return address + size > address;
 }

-// Checks if a given address range lies within a larger address range.
-constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
-                                    VAddr address_range_end) {
-    const VAddr end_address = address + size - 1;
-    return address_range_begin <= address && end_address <= address_range_end - 1;
-}
-
-bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
-                                vm.GetAddressSpaceEndAddress());
-}
-
-bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
-                                vm.GetNewMapRegionEndAddress());
-}
-
 // 8 GiB
 constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;

@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                  src_addr, size);
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+    if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
                  dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
    auto* const current_process = Core::CurrentProcess();
    auto& vm_manager = current_process->VMManager();

-    if (!IsInsideAddressSpace(vm_manager, addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
    }

    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    if (!IsInsideAddressSpace(vm_manager, address, size)) {
+    if (!vm_manager.IsWithinAddressSpace(address, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Given address (0x{:016X}) is outside the bounds of the address space.", address);
        return ERR_INVALID_ADDRESS_STATE;
@@ -1495,13 +1479,14 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
        return ERR_INVALID_ADDRESS;
    }

+    auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
    switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
    case AddressArbiter::ArbitrationType::WaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
+        return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, false);
    case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
+        return address_arbiter.WaitForAddressIfLessThan(address, value, timeout, true);
    case AddressArbiter::ArbitrationType::WaitIfEqual:
-        return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
+        return address_arbiter.WaitForAddressIfEqual(address, value, timeout);
    default:
        LOG_ERROR(Kernel_SVC,
                  "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
@@ -1526,13 +1511,14 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
        return ERR_INVALID_ADDRESS;
    }

+    auto& address_arbiter = Core::System::GetInstance().Kernel().AddressArbiter();
    switch (static_cast<AddressArbiter::SignalType>(type)) {
    case AddressArbiter::SignalType::Signal:
-        return AddressArbiter::SignalToAddress(address, num_to_wake);
+        return address_arbiter.SignalToAddress(address, num_to_wake);
    case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
-        return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
+        return address_arbiter.IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
    case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
+        return address_arbiter.ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
                                                                             num_to_wake);
    default:
        LOG_ERROR(Kernel_SVC,
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
        return ERR_INVALID_PROCESSOR_ID;
    }

-    // TODO(yuriks): Other checks, returning 0xD9001BEA
-
    if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
        // TODO (bunnei): Find the correct error code to use here
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -17,8 +17,8 @@
 #include "core/memory_setup.h"

 namespace Kernel {
-
-static const char* GetMemoryStateName(MemoryState state) {
+namespace {
+const char* GetMemoryStateName(MemoryState state) {
    static constexpr const char* names[] = {
        "Unmapped",         "Io",
        "Normal",           "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
    return names[ToSvcMemoryState(state)];
 }

+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+                                    VAddr address_range_end) {
+    const VAddr end_address = address + size - 1;
+    return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+} // Anonymous namespace
+
 bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
    ASSERT(base + size == next.base);
    if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
 }

 ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
 }

 ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
    return address_space_width;
 }

+bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
+                                GetAddressSpaceEndAddress());
+}
+
 VAddr VMManager::GetASLRRegionBaseAddress() const {
    return aslr_region_base;
 }
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
    return code_region_end - code_region_base;
 }

+bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
+                                GetCodeRegionEndAddress());
+}
+
 VAddr VMManager::GetHeapRegionBaseAddress() const {
    return heap_region_base;
 }
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
    return heap_region_end - heap_region_base;
 }

+bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
+                                GetHeapRegionEndAddress());
+}
+
 VAddr VMManager::GetMapRegionBaseAddress() const {
    return map_region_base;
 }
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
    return map_region_end - map_region_base;
 }

+bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
+}
+
 VAddr VMManager::GetNewMapRegionBaseAddress() const {
    return new_map_region_base;
 }
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
    return new_map_region_end - new_map_region_base;
 }

+bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
+                                GetNewMapRegionEndAddress());
+}
+
 VAddr VMManager::GetTLSIORegionBaseAddress() const {
    return tls_io_region_base;
 }
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
    return tls_io_region_end - tls_io_region_base;
 }

+bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
+                                GetTLSIORegionEndAddress());
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -432,18 +432,21 @@ public:
    /// Gets the address space width in bits.
    u64 GetAddressSpaceWidth() const;

+    /// Determines whether or not the given address range lies within the address space.
+    bool IsWithinAddressSpace(VAddr address, u64 size) const;
+
    /// Gets the base address of the ASLR region.
    VAddr GetASLRRegionBaseAddress() const;

    /// Gets the end address of the ASLR region.
    VAddr GetASLRRegionEndAddress() const;

-    /// Determines whether or not the specified address range is within the ASLR region.
-    bool IsWithinASLRRegion(VAddr address, u64 size) const;
-
    /// Gets the size of the ASLR region
    u64 GetASLRRegionSize() const;

+    /// Determines whether or not the specified address range is within the ASLR region.
+    bool IsWithinASLRRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the code region.
    VAddr GetCodeRegionBaseAddress() const;

@@ -453,6 +456,9 @@ public:
    /// Gets the total size of the code region in bytes.
    u64 GetCodeRegionSize() const;

+    /// Determines whether or not the specified range is within the code region.
+    bool IsWithinCodeRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the heap region.
    VAddr GetHeapRegionBaseAddress() const;

@@ -462,6 +468,9 @@ public:
    /// Gets the total size of the heap region in bytes.
    u64 GetHeapRegionSize() const;

+    /// Determines whether or not the specified range is within the heap region.
+    bool IsWithinHeapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the map region.
    VAddr GetMapRegionBaseAddress() const;

@@ -471,6 +480,9 @@ public:
    /// Gets the total size of the map region in bytes.
    u64 GetMapRegionSize() const;

+    /// Determines whether or not the specified range is within the map region.
+    bool IsWithinMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the new map region.
    VAddr GetNewMapRegionBaseAddress() const;

@@ -480,6 +492,9 @@ public:
    /// Gets the total size of the new map region in bytes.
    u64 GetNewMapRegionSize() const;

+    /// Determines whether or not the given address range is within the new map region
+    bool IsWithinNewMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the TLS IO region.
    VAddr GetTLSIORegionBaseAddress() const;

@@ -489,6 +504,9 @@ public:
    /// Gets the total size of the TLS IO region in bytes.
    u64 GetTLSIORegionSize() const;

+    /// Determines if the given address range is within the TLS IO region.
+    bool IsWithinTLSIORegion(VAddr address, u64 size) const;
+
    /// Each VMManager has its own page table, which is set as the main one when the owning process
    /// is scheduled.
    Memory::PageTable page_table;
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,7 +8,6 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"

 // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/frontend/applets/software_keyboard.h"
+#include "core/hle/result.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/software_keyboard.h"

--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
 #include <vector>

 #include "common/common_funcs.h"
+#include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/applets.h"

+union ResultCode;
+
 namespace Service::AM::Applets {

 enum class KeysetDisable : u32 {
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audout_u.h"
+#include "core/hle/service/audio/errors.h"
 #include "core/memory.h"

 namespace Service::Audio {

-namespace ErrCodes {
-enum {
-    ErrorUnknown = 2,
-    BufferCountExceeded = 8,
-};
-}
-
 constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
 constexpr int DefaultSampleRate{48000};

@@ -100,7 +94,7 @@ private:

        if (stream->IsPlaying()) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
+            rb.Push(ERR_OPERATION_FAILED);
            return;
        }

@@ -113,7 +107,9 @@ private:
    void StopAudioOut(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Audio, "called");

-        audio_core.StopStream(stream);
+        if (stream->IsPlaying()) {
+            audio_core.StopStream(stream);
+        }

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:

        if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
+            rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
+            return;
        }

        IPC::ResponseBuilder rb{ctx, 2};
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audren_u.h"
+#include "core/hle/service/audio/errors.h"

 namespace Service::Audio {

@@ -37,7 +38,7 @@ public:
            {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
            {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
            {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
-            {11, nullptr, "ExecuteAudioRendererRendering"},
+            {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
        };
        // clang-format on
        RegisterHandlers(functions);
@@ -138,6 +139,17 @@ private:
        rb.Push(rendering_time_limit_percent);
    }

+    void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_Audio, "called");
+
+        // This service command currently only reports an unsupported operation
+        // error code, or aborts. Given that, we just always return an error
+        // code in this case.
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ERR_NOT_SUPPORTED);
+    }
+
    Kernel::EventPair system_event;
    std::unique_ptr<AudioCore::AudioRenderer> renderer;
    u32 rendering_time_limit_percent = 100;
@@ -235,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
        {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
        {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
        {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
-        {3, nullptr, "OpenAudioRendererAuto"},
+        {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
    };
    // clang-format on
@@ -248,12 +260,7 @@ AudRenU::~AudRenU() = default;
 void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

-    IPC::RequestParser rp{ctx};
-    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
+    OpenAudioRendererImpl(ctx);
 }

 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -262,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.unknown_c * 1024;
-    buffer_sz += 0x940 * (params.unknown_c + 1);
+    buffer_sz += params.submix_count * 1024;
+    buffer_sz += 0x940 * (params.submix_count + 1);
    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz +=
-        Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
-                            (params.mix_buffer_count + 6),
-                        0x40);
+    buffer_sz += Common::AlignUp(
+        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
+            (params.mix_buffer_count + 6),
+        0x40);

    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        u32 count = params.unknown_c + 1;
+        const u32 count = params.submix_count + 1;
        u64 node_count = Common::AlignUp(count, 0x40);
-        u64 node_state_buffer_sz =
+        const u64 node_state_buffer_sz =
            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
        u64 edge_matrix_buffer_sz = 0;
        node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {

    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.unknown_2c;
+        buffer_sz += 0xE0 * params.num_splitter_send_channels;
        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
+        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
    }
    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
                    ((params.voice_count * 256) | 0x40);

-    if (params.unknown_1c >= 1) {
+    if (params.performance_frame_count >= 1) {
        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
                                      16 * params.voice_count + 16) +
                                     0x658) *
-                                            (params.unknown_1c + 1) +
+                                            (params.performance_frame_count + 1) +
                                        0xc0,
                                    0x40) +
                    output_sz;
@@ -325,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<Audio::IAudioDevice>();
 }

+void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_Audio, "called");
+
+    OpenAudioRendererImpl(ctx);
+}
+
 void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_Audio, "(STUBBED) called");

@@ -335,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
                                                // based on the current revision
 }

+void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IAudioRenderer>(params);
+}
+
 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
    switch (feature) {
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
    void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
    void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
    void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
+    void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
    void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);

+    void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
+
    enum class AudioFeatures : u32 {
        Splitter,
    };
--- a/src/core/hle/service/audio/errors.h
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::Audio {
+
+constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
+constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
+constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
+
+} // namespace Service::Audio
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -15,7 +15,7 @@ namespace Kernel {
 class SharedMemory;
 }

-namespace SM {
+namespace Service::SM {
 class ServiceManager;
 }

--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
-                        const MathUtil::Rectangle<int>& crop_rect) {
+                        const Common::Rectangle<int>& crop_rect) {
    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3

    auto& instance = Core::System::GetInstance();
    instance.GetPerfStats().EndGameFrame();
-    instance.Renderer().SwapBuffers(framebuffer);
+    instance.GPU().SwapBuffers(framebuffer);
 }

 } // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
              NVFlinger::BufferQueue::BufferTransformFlags transform,
-              const MathUtil::Rectangle<int>& crop_rect);
+              const Common::Rectangle<int>& crop_rect);

 private:
    std::shared_ptr<nvmap> nvmap_dev;
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    auto& gpu = system_instance.GPU();
    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
    ASSERT(cpu_addr);
-    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+    gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size);

    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
    return 0;
 }

-static void PushGPUEntries(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
-        return;
-    }
-
-    auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
-    dma_pusher.Push(std::move(entries));
-    dma_pusher.DispatchCalls();
-}
-
 u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
    if (input.size() < sizeof(IoctlSubmitGpfifo)) {
        UNIMPLEMENTED();
@@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
                params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
@@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
    Memory::ReadBlock(params.address, entries.data(),
                      params.num_entries * sizeof(Tegra::CommandListHeader));

-    PushGPUEntries(std::move(entries));
+    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));

    params.fence_out.id = 0;
    params.fence_out.value = 0;
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
 }

 void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
-                              const MathUtil::Rectangle<int>& crop_rect) {
+                              const Common::Rectangle<int>& crop_rect) {
    auto itr = std::find_if(queue.begin(), queue.end(),
                            [&](const Buffer& buffer) { return buffer.slot == slot; });
    ASSERT(itr != queue.end());
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
        Status status = Status::Free;
        IGBPBuffer igbp_buffer;
        BufferTransformFlags transform;
-        MathUtil::Rectangle<int> crop_rect;
+        Common::Rectangle<int> crop_rect;
    };

    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
    std::optional<u32> DequeueBuffer(u32 width, u32 height);
    const IGBPBuffer& RequestBuffer(u32 slot) const;
    void QueueBuffer(u32 slot, BufferTransformFlags transform,
-                     const MathUtil::Rectangle<int>& crop_rect);
+                     const Common::Rectangle<int>& crop_rect);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
    u32 Query(QueryType type);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -186,7 +186,7 @@ void NVFlinger::Compose() {

            // There was no queued buffer to draw, render previous frame
            system_instance.GetPerfStats().EndGameFrame();
-            system_instance.Renderer().SwapBuffers({});
+            system_instance.GPU().SwapBuffers({});
            continue;
        }

--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
    rb.Push(RESULT_SUCCESS);
-    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client};
+    Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client};
    rb.PushMoveObjects(session);

    LOG_DEBUG(Service, "session={}", session->GetObjectId());
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -420,7 +420,7 @@ public:
        u32_le fence_is_valid;
        std::array<Fence, 2> fences;

-        MathUtil::Rectangle<int> GetCropRect() const {
+        Common::Rectangle<int> GetCropRect() const {
            return {crop_left, crop_top, crop_right, crop_bottom};
        }
    };
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
                                 FlushMode::FlushAndInvalidate);

    VAddr end = base + size;
-    while (base != end) {
-        ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());

-        page_table.attributes[base] = type;
-        page_table.pointers[base] = memory;
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);

-        base += 1;
-        if (memory != nullptr)
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+
+            base += 1;
            memory += PAGE_SIZE;
+        }
    }
 }

@@ -166,9 +171,6 @@ T Read(const VAddr vaddr) {
        return value;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
    case PageType::Unmapped:
@@ -199,9 +201,6 @@ void Write(const VAddr vaddr, const T data) {
        return;
    }

-    // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
    PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
    switch (type) {
    case PageType::Unmapped:
@@ -357,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
        const VAddr overlap_end = std::min(end, region_end);
        const VAddr overlap_size = overlap_end - overlap_start;

-        auto& rasterizer = system_instance.Renderer().Rasterizer();
+        auto& gpu = system_instance.GPU();
        switch (mode) {
        case FlushMode::Flush:
-            rasterizer.FlushRegion(overlap_start, overlap_size);
+            gpu.FlushRegion(overlap_start, overlap_size);
            break;
        case FlushMode::Invalidate:
-            rasterizer.InvalidateRegion(overlap_start, overlap_size);
+            gpu.InvalidateRegion(overlap_start, overlap_size);
            break;
        case FlushMode::FlushAndInvalidate:
-            rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
+            gpu.FlushAndInvalidateRegion(overlap_start, overlap_size);
            break;
        }
    };
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -393,6 +393,7 @@ struct Values {
    u16 frame_limit;
    bool use_disk_shader_cache;
    bool use_accurate_gpu_emulation;
+    bool use_asynchronous_gpu_emulation;

    float bg_red;
    float bg_green;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() {
             Settings::values.use_disk_shader_cache);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
             Settings::values.use_accurate_gpu_emulation);
+    AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation",
+             Settings::values.use_asynchronous_gpu_emulation);
    AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
             Settings::values.use_docked_mode);
 }
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
    }

    void BeginTilt(int x, int y) {
-        mouse_origin = Math::MakeVec(x, y);
+        mouse_origin = Common::MakeVec(x, y);
        is_tilting = true;
    }

    void Tilt(int x, int y) {
-        auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
+        auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
        if (is_tilting) {
            std::lock_guard<std::mutex> guard(tilt_mutex);
            if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
            } else {
                tilt_direction = mouse_move.Cast<float>();
                tilt_angle =
-                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
+                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
            }
        }
    }
@@ -56,7 +56,7 @@ public:
        is_tilting = false;
    }

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
        std::lock_guard<std::mutex> guard(status_mutex);
        return status;
    }
@@ -66,17 +66,17 @@ private:
    const std::chrono::steady_clock::duration update_duration;
    const float sensitivity;

-    Math::Vec2<int> mouse_origin;
+    Common::Vec2<int> mouse_origin;

    std::mutex tilt_mutex;
-    Math::Vec2<float> tilt_direction;
+    Common::Vec2<float> tilt_direction;
    float tilt_angle = 0;

    bool is_tilting = false;

    Common::Event shutdown_event;

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
    std::mutex status_mutex;

    // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:

    void MotionEmuThread() {
        auto update_time = std::chrono::steady_clock::now();
-        Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
-        Math::Quaternion<float> old_q;
+        Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
+        Common::Quaternion<float> old_q;

        while (!shutdown_event.WaitUntil(update_time)) {
            update_time += update_duration;
@@ -96,18 +96,18 @@ private:
                std::lock_guard<std::mutex> guard(tilt_mutex);

                // Find the quaternion describing current 3DS tilting
-                q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
-                                   tilt_angle);
+                q = Common::MakeQuaternion(
+                    Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
            }

            auto inv_q = q.Inverse();

            // Set the gravity vector in world space
-            auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
+            auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);

            // Find the angular rate vector in world space
            auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
-            angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
+            angular_rate *= 1000 / update_millisecond / Common::PI * 180;

            // Transform the two vectors from world space to 3DS space
            gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
        device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
    }

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
        return device->GetStatus();
    }

--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -13,11 +13,11 @@
 namespace ArmTests {

 TestEnvironment::TestEnvironment(bool mutable_memory_)
-    : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
-
+    : mutable_memory(mutable_memory_),
+      test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
    auto process = Kernel::Process::Create(kernel, "");
    kernel.MakeCurrentProcess(process.get());
-    page_table = &Core::CurrentProcess()->VMManager().page_table;
+    page_table = &process->VMManager().page_table;

    std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
    page_table->special_regions.clear();
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -17,6 +17,12 @@ add_library(video_core STATIC
    engines/shader_header.h
    gpu.cpp
    gpu.h
+    gpu_asynch.cpp
+    gpu_asynch.h
+    gpu_synch.cpp
+    gpu_synch.h
+    gpu_thread.cpp
+    gpu_thread.h
    macro_interpreter.cpp
    macro_interpreter.h
    memory_manager.cpp
@@ -106,6 +112,10 @@ add_library(video_core STATIC
 if (ENABLE_VULKAN)
    target_sources(video_core PRIVATE
        renderer_vulkan/declarations.h
+        renderer_vulkan/maxwell_to_vk.cpp
+        renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/vk_buffer_cache.cpp
+        renderer_vulkan/vk_buffer_cache.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
        renderer_vulkan/vk_memory_manager.cpp
@@ -113,7 +123,9 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_scheduler.cpp
-        renderer_vulkan/vk_scheduler.h)
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h)

    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -39,7 +39,7 @@ bool DmaPusher::Step() {
    }

    const CommandList& command_list{dma_pushbuffer.front()};
-    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
    GPUVAddr dma_get = command_list_header.addr;
    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
    bool non_main = command_list_header.is_non_main;
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,12 +2,11 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "core/core.h"
-#include "core/memory.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
 #include "video_core/engines/fermi_2d.h"
-#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/textures/decoders.h"

 namespace Tegra::Engines {

@@ -44,10 +43,10 @@ void Fermi2D::HandleSurfaceCopy() {
    const u32 src_blit_y2{
        static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};

-    const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
-    const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
-                                            regs.blit_dst_x + regs.blit_dst_width,
-                                            regs.blit_dst_y + regs.blit_dst_height};
+    const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+                                          regs.blit_dst_x + regs.blit_dst_width,
+                                          regs.blit_dst_y + regs.blit_dst_height};

    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
        UNIMPLEMENTED();
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -5,7 +5,7 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,9 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
-#include "core/memory.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/memory_manager.h"

--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,8 +5,7 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
-#include "common/bit_field.h"
+#include <cstddef>
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) {
    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
    // We do this before actually writing the new data because the destination address might contain
    // a dirty surface that will have to be written back to memory.
-    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
+    Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32));

    Memory::Write32(*dest_address, data);
    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
    auto debug_context = system.GetGPUDebugContext();

+    const u32 method = method_call.method;
+
    // It is an error to write to a register other than the current macro's ARG register before it
    // has finished execution.
    if (executing_macro != 0) {
-        ASSERT(method_call.method == executing_macro + 1);
+        ASSERT(method == executing_macro + 1);
    }

    // Methods after 0xE00 are special, they're actually triggers for some microcode that was
    // uploaded to the GPU during initialization.
-    if (method_call.method >= MacroRegistersStart) {
+    if (method >= MacroRegistersStart) {
        // We're trying to execute a macro
        if (executing_macro == 0) {
            // A macro call must begin by writing the macro method's register, not its argument.
-            ASSERT_MSG((method_call.method % 2) == 0,
+            ASSERT_MSG((method % 2) == 0,
                       "Can't start macro execution by writing to the ARGS register");
-            executing_macro = method_call.method;
+            executing_macro = method;
        }

        macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        return;
    }

-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+    ASSERT_MSG(method < Regs::NUM_REGS,
               "Invalid Maxwell3D register, increase the size of the Regs structure");

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
    }

-    if (regs.reg_array[method_call.method] != method_call.argument) {
-        regs.reg_array[method_call.method] = method_call.argument;
+    if (regs.reg_array[method] != method_call.argument) {
+        regs.reg_array[method] = method_call.argument;
        // Color buffers
        constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
        constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method_call.method >= first_rt_reg &&
-            method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+        if (method >= first_rt_reg &&
+            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+            dirty_flags.color_buffer.set(rt_index);
        }

        // Zeta buffer
        constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+        if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+            method == MAXWELL3D_REG_INDEX(zeta_width) ||
+            method == MAXWELL3D_REG_INDEX(zeta_height) ||
+            (method >= MAXWELL3D_REG_INDEX(zeta) &&
+             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
            dirty_flags.zeta_buffer = true;
        }

        // Shader
        constexpr u32 shader_registers_count =
            sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
-        if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
-            method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
            dirty_flags.shaders = true;
        }

        // Vertex format
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
-            method_call.method <
-                MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
            dirty_flags.vertex_attrib_format = true;
        }

        // Vertex buffer
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
-            dirty_flags.vertex_array |=
-                1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
        }
    }

-    switch (method_call.method) {
+    switch (method) {
    case MAXWELL3D_REG_INDEX(macros.data): {
        ProcessMacroUpload(method_call.argument);
        break;
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
 #pragma once

 #include <array>
+#include <bitset>
 #include <unordered_map>
 #include <vector>
+
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
@@ -503,7 +505,7 @@ public:
            f32 translate_z;
            INSERT_PADDING_WORDS(2);

-            MathUtil::Rectangle<s32> GetRect() const {
+            Common::Rectangle<s32> GetRect() const {
                return {
                    GetX(),               // left
                    GetY() + GetHeight(), // top
@@ -1094,19 +1096,18 @@ public:
    MemoryManager& memory_manager;

    struct DirtyFlags {
-        u8 color_buffer = 0xFF;
-        bool zeta_buffer = true;
-
-        bool shaders = true;
+        std::bitset<8> color_buffer{0xFF};
+        std::bitset<32> vertex_array{0xFFFFFFFF};

        bool vertex_attrib_format = true;
-        u32 vertex_array = 0xFFFFFFFF;
+        bool zeta_buffer = true;
+        bool shaders = true;

        void OnMemoryWrite() {
-            color_buffer = 0xFF;
            zeta_buffer = true;
            shaders = true;
-            vertex_array = 0xFFFFFFFF;
+            color_buffer.set();
+            vertex_array.set();
        }
    };

--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() {
    const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
        // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
        // copying.
-        rasterizer.FlushRegion(*source_cpu, src_size);
+        Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size);

        // We have to invalidate the destination region to evict any outdated surfaces from the
        // cache. We do this before actually writing the new data because the destination address
        // might contain a dirty surface that will have to be written back to memory.
-        rasterizer.InvalidateRegion(*dest_cpu, dst_size);
+        Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size);
    };

    if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include <cstddef>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -6,7 +6,6 @@

 #include <bitset>
 #include <optional>
-#include <string>
 #include <tuple>
 #include <vector>

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -12,7 +12,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 namespace Tegra {

@@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    UNREACHABLE();
 }

-GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
+    auto& rasterizer{renderer.Rasterizer()};
    memory_manager = std::make_unique<Tegra::MemoryManager>();
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,8 +16,8 @@ class System;
 }

 namespace VideoCore {
-class RasterizerInterface;
-}
+class RendererBase;
+} // namespace VideoCore

 namespace Tegra {

@@ -100,7 +100,7 @@ struct FramebufferConfig {

    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
    TransformFlags transform_flags;
-    MathUtil::Rectangle<int> crop_rect;
+    Common::Rectangle<int> crop_rect;
 };

 namespace Engines {
@@ -119,10 +119,11 @@ enum class EngineID {
    MAXWELL_DMA_COPY_A = 0xB0B5,
 };

-class GPU final {
+class GPU {
 public:
-    explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
-    ~GPU();
+    explicit GPU(Core::System& system, VideoCore::RendererBase& renderer);
+
+    virtual ~GPU();

    struct MethodCall {
        u32 method{};
@@ -200,8 +201,42 @@ public:
        };
    } regs{};

+    /// Push GPU command entries to be processed
+    virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
+
+    /// Swap buffers (render frame)
+    virtual void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
+
 private:
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessSemaphoreTriggerMethod();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreAcquire();
+
+    /// Calls a GPU puller method.
+    void CallPullerMethod(const MethodCall& method_call);
+
+    /// Calls a GPU engine method.
+    void CallEngineMethod(const MethodCall& method_call);
+
+    /// Determines where the method should be executed.
+    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+
+protected:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+    VideoCore::RendererBase& renderer;
+
+private:
    std::unique_ptr<Tegra::MemoryManager> memory_manager;

    /// Mapping of command subchannels to their bound engine ids.
@@ -217,18 +252,6 @@ private:
    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
    /// Inline memory engine
    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
-
-    void ProcessBindMethod(const MethodCall& method_call);
-    void ProcessSemaphoreTriggerMethod();
-    void ProcessSemaphoreRelease();
-    void ProcessSemaphoreAcquire();
-
-    // Calls a GPU puller method.
-    void CallPullerMethod(const MethodCall& method_call);
-    // Calls a GPU engine method.
-    void CallEngineMethod(const MethodCall& method_call);
-    // Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(const MethodCall& method_call);
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {}
+
+GPUAsynch::~GPUAsynch() = default;
+
+void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    gpu_thread.SubmitList(std::move(entries));
+}
+
+void GPUAsynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    gpu_thread.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushRegion(addr, size);
+}
+
+void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.InvalidateRegion(addr, size);
+}
+
+void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    gpu_thread.FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+namespace GPUThread {
+class ThreadManager;
+} // namespace GPUThread
+
+/// Implementation of GPU interface that runs the GPU asynchronously
+class GPUAsynch : public Tegra::GPU {
+public:
+    explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUAsynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+
+private:
+    GPUThread::ThreadManager gpu_thread;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/gpu_synch.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon {
+
+GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
+    : Tegra::GPU(system, renderer) {}
+
+GPUSynch::~GPUSynch() = default;
+
+void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
+    dma_pusher->Push(std::move(entries));
+    dma_pusher->DispatchCalls();
+}
+
+void GPUSynch::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    renderer.SwapBuffers(std::move(framebuffer));
+}
+
+void GPUSynch::FlushRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().FlushRegion(addr, size);
+}
+
+void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().InvalidateRegion(addr, size);
+}
+
+void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
+}
+
+} // namespace VideoCommon
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/gpu.h"
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon {
+
+/// Implementation of GPU interface that runs the GPU synchronously
+class GPUSynch : public Tegra::GPU {
+public:
+    explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
+    ~GPUSynch() override;
+
+    void PushGPUEntries(Tegra::CommandList&& entries) override;
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -0,0 +1,152 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/microprofile.h"
+#include "core/frontend/scope_acquire_window_context.h"
+#include "core/settings.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/gpu.h"
+#include "video_core/gpu_thread.h"
+#include "video_core/renderer_base.h"
+
+namespace VideoCommon::GPUThread {
+
+/// Executes a single GPU thread command
+static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
+                           Tegra::DmaPusher& dma_pusher) {
+    if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
+        dma_pusher.Push(std::move(submit_list->entries));
+        dma_pusher.DispatchCalls();
+    } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
+        renderer.SwapBuffers(data->framebuffer);
+    } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
+        renderer.Rasterizer().FlushRegion(data->addr, data->size);
+    } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
+        renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
+    } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
+        renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
+    } else {
+        UNREACHABLE();
+    }
+}
+
+/// Runs the GPU thread
+static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
+                      SynchState& state) {
+
+    MicroProfileOnThreadCreate("GpuThread");
+
+    auto WaitForWakeup = [&]() {
+        std::unique_lock<std::mutex> lock{state.signal_mutex};
+        state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
+    };
+
+    // Wait for first GPU command before acquiring the window context
+    WaitForWakeup();
+
+    // If emulation was stopped during disk shader loading, abort before trying to acquire context
+    if (!state.is_running) {
+        return;
+    }
+
+    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+
+    while (state.is_running) {
+        if (!state.is_running) {
+            return;
+        }
+
+        {
+            // Thread has been woken up, so make the previous write queue the next read queue
+            std::lock_guard<std::mutex> lock{state.signal_mutex};
+            std::swap(state.push_queue, state.pop_queue);
+        }
+
+        // Execute all of the GPU commands
+        while (!state.pop_queue->empty()) {
+            ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
+            state.pop_queue->pop();
+        }
+
+        state.UpdateIdleState();
+
+        // Signal that the GPU thread has finished processing commands
+        if (state.is_idle) {
+            state.idle_condition.notify_one();
+        }
+
+        // Wait for CPU thread to send more GPU commands
+        WaitForWakeup();
+    }
+}
+
+ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
+    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
+                                                         std::ref(dma_pusher), std::ref(state)},
+      thread_id{thread.get_id()} {}
+
+ThreadManager::~ThreadManager() {
+    {
+        // Notify GPU thread that a shutdown is pending
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+        state.is_running = false;
+    }
+
+    state.signal_condition.notify_one();
+    thread.join();
+}
+
+void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
+    if (entries.empty()) {
+        return;
+    }
+
+    PushCommand(SubmitListCommand(std::move(entries)), false, false);
+}
+
+void ThreadManager::SwapBuffers(
+    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+}
+
+void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+    // Block the CPU when using accurate emulation
+    PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
+}
+
+void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
+    PushCommand(InvalidateRegionCommand(addr, size), true, true);
+}
+
+void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+    InvalidateRegion(addr, size);
+}
+
+void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
+    {
+        std::lock_guard<std::mutex> lock{state.signal_mutex};
+
+        if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
+            // Execute the command synchronously on the current thread
+            ExecuteCommand(&command_data, renderer, dma_pusher);
+            return;
+        }
+
+        // Push the command to the GPU thread
+        state.UpdateIdleState();
+        state.push_queue->emplace(command_data);
+    }
+
+    // Signal the GPU thread that commands are pending
+    state.signal_condition.notify_one();
+
+    if (wait_for_idle) {
+        // Wait for the GPU to be idle (all commands to be executed)
+        std::unique_lock<std::mutex> lock{state.idle_mutex};
+        state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
+    }
+}
+
+} // namespace VideoCommon::GPUThread
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -0,0 +1,133 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <thread>
+#include <variant>
+
+namespace Tegra {
+struct FramebufferConfig;
+class DmaPusher;
+} // namespace Tegra
+
+namespace VideoCore {
+class RendererBase;
+} // namespace VideoCore
+
+namespace VideoCommon::GPUThread {
+
+/// Command to signal to the GPU thread that a command list is ready for processing
+struct SubmitListCommand final {
+    explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
+
+    Tegra::CommandList entries;
+};
+
+/// Command to signal to the GPU thread that a swap buffers is pending
+struct SwapBuffersCommand final {
+    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
+        : framebuffer{std::move(framebuffer)} {}
+
+    std::optional<const Tegra::FramebufferConfig> framebuffer;
+};
+
+/// Command to signal to the GPU thread to flush a region
+struct FlushRegionCommand final {
+    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+/// Command to signal to the GPU thread to invalidate a region
+struct InvalidateRegionCommand final {
+    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+/// Command to signal to the GPU thread to flush and invalidate a region
+struct FlushAndInvalidateRegionCommand final {
+    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+        : addr{addr}, size{size} {}
+
+    const VAddr addr;
+    const u64 size;
+};
+
+using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+/// Struct used to synchronize the GPU thread
+struct SynchState final {
+    std::atomic<bool> is_running{true};
+    std::atomic<bool> is_idle{true};
+    std::condition_variable signal_condition;
+    std::mutex signal_mutex;
+    std::condition_variable idle_condition;
+    std::mutex idle_mutex;
+
+    // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
+    // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
+    // empty. This allows for efficient thread-safe access, as it does not require any copies.
+
+    using CommandQueue = std::queue<CommandData>;
+    std::array<CommandQueue, 2> command_queues;
+    CommandQueue* push_queue{&command_queues[0]};
+    CommandQueue* pop_queue{&command_queues[1]};
+
+    void UpdateIdleState() {
+        std::lock_guard<std::mutex> lock{idle_mutex};
+        is_idle = command_queues[0].empty() && command_queues[1].empty();
+    }
+};
+
+/// Class used to manage the GPU thread
+class ThreadManager final {
+public:
+    explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+    ~ThreadManager();
+
+    /// Push GPU command entries to be processed
+    void SubmitList(Tegra::CommandList&& entries);
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
+    void FlushRegion(VAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(VAddr addr, u64 size);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    void FlushAndInvalidateRegion(VAddr addr, u64 size);
+
+private:
+    /// Pushes a command to be executed by the GPU thread
+    void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
+
+    /// Returns true if this is called by the GPU thread
+    bool IsGpuThread() const {
+        return std::this_thread::get_id() == thread_id;
+    }
+
+private:
+    SynchState state;
+    VideoCore::RendererBase& renderer;
+    Tegra::DmaPusher& dma_pusher;
+    std::thread thread;
+    std::thread::id thread_id;
+};
+
+} // namespace VideoCommon::GPUThread
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
        return ++modified_ticks;
    }

+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        if (!object->IsDirty()) {
+            return;
+        }
+        object->Flush();
+        object->MarkAsModified(false, *this);
+    }
+
 private:
    /// Returns a list of cached objects from the specified memory region, ordered by access time
    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
        return objects;
    }

-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        if (!object->IsDirty()) {
-            return;
-        }
-        object->Flush();
-        object->MarkAsModified(false, *this);
-    }
-
    using ObjectSet = std::set<T>;
    using ObjectCache = std::unordered_map<VAddr, T>;
    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                       const MathUtil::Rectangle<u32>& src_rect,
-                                       const MathUtil::Rectangle<u32>& dst_rect) {
+                                       const Common::Rectangle<u32>& src_rect,
+                                       const Common::Rectangle<u32>& dst_rect) {
        return false;
    }

--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/logging/log.h"
 #include "core/frontend/emu_window.h"
 #include "core/settings.h"
 #include "video_core/renderer_base.h"
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
                                   ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
    }

    // Rebinding the VAO invalidates the vertex buffer bindings.
-    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+    gpu.dirty_flags.vertex_array.set();

    state.draw.vertex_array = vao_entry.handle;
    return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

-    if (!gpu.dirty_flags.vertex_array)
+    if (gpu.dirty_flags.vertex_array.none())
        return;

    MICROPROFILE_SCOPE(OpenGL_VB);

    // Upload all guest vertex arrays sequentially to our buffer
    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (~gpu.dirty_flags.vertex_array & (1u << index))
+        if (!gpu.dirty_flags.vertex_array[index])
            continue;

        const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
        }
    }

-    gpu.dirty_flags.vertex_array = 0;
+    gpu.dirty_flags.vertex_array.reset();
 }

 DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
    OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
    std::optional<std::size_t> single_color_target) {
    MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

    const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
                                                 single_color_target};
-    if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
-        !gpu.dirty_flags.zeta_buffer) {
+    if (fb_config_state == current_framebuffer_config_state &&
+        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
        // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
        // single color targets). This is done because the guest registers may not change but the
        // host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
    // Add space for at least 18 constant buffers
    buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);

-    bool invalidate = buffer_cache.Map(buffer_size);
+    const bool invalidate = buffer_cache.Map(buffer_size);
    if (invalidate) {
        // As all cached buffers are invalidated, we need to recheck their state.
-        gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+        gpu.dirty_flags.vertex_array.set();
    }

    const GLuint vao = SetupVertexFormat();
@@ -738,30 +738,18 @@ void RasterizerOpenGL::DrawArrays() {
    shader_program_manager->ApplyTo(state);
    state.Apply();

-    // Execute draw call
+    res_cache.SignalPreDrawCall();
    params.DispatchDraw();
-
-    // Disable scissor test
-    state.viewports[0].scissor.enabled = false;
+    res_cache.SignalPostDrawCall();

    accelerate_draw = AccelDraw::Disabled;
-
-    // Unbind textures for potential future use as framebuffer attachments
-    for (auto& texture_unit : state.texture_units) {
-        texture_unit.Unbind();
-    }
-    state.Apply();
 }

 void RasterizerOpenGL::FlushAll() {}

 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-
-    if (Settings::values.use_accurate_gpu_emulation) {
-        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
-        res_cache.FlushRegion(addr, size);
-    }
+    res_cache.FlushRegion(addr, size);
 }

 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -779,8 +767,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {

 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                             const MathUtil::Rectangle<u32>& src_rect,
-                                             const MathUtil::Rectangle<u32>& dst_rect) {
+                                             const Common::Rectangle<u32>& src_rect,
+                                             const Common::Rectangle<u32>& dst_rect) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
    res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
    return true;
@@ -814,104 +802,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,

 void RasterizerOpenGL::SamplerInfo::Create() {
    sampler.Create();
-    mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
-    wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
-    uses_depth_compare = false;
+    mag_filter = Tegra::Texture::TextureFilter::Linear;
+    min_filter = Tegra::Texture::TextureFilter::Linear;
+    wrap_u = Tegra::Texture::WrapMode::Wrap;
+    wrap_v = Tegra::Texture::WrapMode::Wrap;
+    wrap_p = Tegra::Texture::WrapMode::Wrap;
+    use_depth_compare = false;
    depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;

-    // default is GL_LINEAR_MIPMAP_LINEAR
+    // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
    glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    // Other attributes have correct defaults
    glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
+
+    // Other attributes have correct defaults
 }

 void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
-    const GLuint s = sampler.handle;
+    const GLuint sampler_id = sampler.handle;
    if (mag_filter != config.mag_filter) {
        mag_filter = config.mag_filter;
        glSamplerParameteri(
-            s, GL_TEXTURE_MAG_FILTER,
+            sampler_id, GL_TEXTURE_MAG_FILTER,
            MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
    }
-    if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
+    if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
        min_filter = config.min_filter;
-        mip_filter = config.mip_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
-                            MaxwellToGL::TextureFilterMode(min_filter, mip_filter));
+        mipmap_filter = config.mipmap_filter;
+        glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
+                            MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
    }

    if (wrap_u != config.wrap_u) {
        wrap_u = config.wrap_u;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
    }
    if (wrap_v != config.wrap_v) {
        wrap_v = config.wrap_v;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
    }
    if (wrap_p != config.wrap_p) {
        wrap_p = config.wrap_p;
-        glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+        glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
    }

-    if (uses_depth_compare != (config.depth_compare_enabled == 1)) {
-        uses_depth_compare = (config.depth_compare_enabled == 1);
-        if (uses_depth_compare) {
-            glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);
-        } else {
-            glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE);
-        }
+    if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
+        use_depth_compare = enabled;
+        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
+                            use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
    }

    if (depth_compare_func != config.depth_compare_func) {
        depth_compare_func = config.depth_compare_func;
-        glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC,
+        glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
                            MaxwellToGL::DepthCompareFunc(depth_compare_func));
    }

-    GLvec4 new_border_color;
-    if (config.srgb_conversion) {
-        new_border_color[0] = config.srgb_border_color_r / 255.0f;
-        new_border_color[1] = config.srgb_border_color_g / 255.0f;
-        new_border_color[2] = config.srgb_border_color_g / 255.0f;
-    } else {
-        new_border_color[0] = config.border_color_r;
-        new_border_color[1] = config.border_color_g;
-        new_border_color[2] = config.border_color_b;
-    }
-    new_border_color[3] = config.border_color_a;
-
-    if (border_color != new_border_color) {
+    if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
        border_color = new_border_color;
-        glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data());
+        glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
    }

-    const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value());
-    if (anisotropic_max != max_anisotropic) {
-        max_anisotropic = anisotropic_max;
+    if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
+        max_anisotropic = anisotropic;
        if (GLAD_GL_ARB_texture_filter_anisotropic) {
-            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
+            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
        } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
-            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
+            glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
        }
    }
-    const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f;
-    if (lod_min != min_lod) {
-        min_lod = lod_min;
-        glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod);
+
+    if (const float min = config.GetMinLod(); min_lod != min) {
+        min_lod = min;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
+    }
+    if (const float max = config.GetMaxLod(); max_lod != max) {
+        max_lod = max;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
    }

-    const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f;
-    if (lod_max != max_lod) {
-        max_lod = lod_max;
-        glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod);
-    }
-    const u32 bias = config.mip_lod_bias.Value();
-    // Sign extend the 13-bit value.
-    constexpr u32 mask = 1U << (13 - 1);
-    const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f;
-    if (lod_bias != bias_lod) {
-        lod_bias = bias_lod;
-        glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias);
+    if (const float bias = config.GetLodBias(); lod_bias != bias) {
+        lod_bias = bias;
+        glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
    }
 }

@@ -1034,7 +1005,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
    for (std::size_t i = 0; i < viewport_count; i++) {
        auto& viewport = current_state.viewports[i];
        const auto& src = regs.viewports[i];
-        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
        viewport.x = viewport_rect.left;
        viewport.y = viewport_rect.bottom;
        viewport.width = viewport_rect.GetWidth();
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                               const MathUtil::Rectangle<u32>& src_rect,
-                               const MathUtil::Rectangle<u32>& dst_rect) override;
+                               const Common::Rectangle<u32>& src_rect,
+                               const Common::Rectangle<u32>& dst_rect) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
@@ -94,11 +94,12 @@ private:
    private:
        Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
        Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
-        Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None;
+        Tegra::Texture::TextureMipmapFilter mipmap_filter =
+            Tegra::Texture::TextureMipmapFilter::None;
        Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
        Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
        Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
-        bool uses_depth_compare = false;
+        bool use_depth_compare = false;
        Tegra::Texture::DepthCompareFunc depth_compare_func =
            Tegra::Texture::DepthCompareFunc::Always;
        GLvec4 border_color = {};
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <optional>
 #include <glad/glad.h>

 #include "common/alignment.h"
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
    u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
    if (IsPixelFormatASTC(pixel_format)) {
        // ASTC formats must stop at the ATSC block size boundary
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
    // alternatives. This signals a bug on those functions.
    const auto width = static_cast<GLsizei>(params.MipWidth(0));
    const auto height = static_cast<GLsizei>(params.MipHeight(0));
+    memory_size = params.MemorySize();
+    reinterpreted = false;

    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
    gl_internal_format = format_tuple.internal_format;
@@ -873,30 +876,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
    auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
    const auto& regs{gpu.regs};

-    if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
-        return last_color_buffers[index];
+    if (!gpu.dirty_flags.color_buffer[index]) {
+        return current_color_buffers[index];
    }
-    gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+    gpu.dirty_flags.color_buffer.reset(index);

    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);

    if (index >= regs.rt_control.count) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
    }

    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
    }

    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};

-    return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+    return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
 }

 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
    surface->LoadGLBuffer();
    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
    surface->MarkAsModified(false, *this);
+    surface->MarkForReload(false);
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -908,18 +912,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
    Surface surface{TryGet(params.addr)};
    if (surface) {
        if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
-            // Use the cached surface as-is
+            // Use the cached surface as-is unless it's not synced with memory
+            if (surface->MustReload())
+                LoadSurface(surface);
            return surface;
        } else if (preserve_contents) {
            // If surface parameters changed and we care about keeping the previous data, recreate
            // the surface from the old one
            Surface new_surface{RecreateSurface(surface, params)};
-            Unregister(surface);
+            UnregisterSurface(surface);
            Register(new_surface);
+            if (new_surface->IsUploaded()) {
+                RegisterReinterpretSurface(new_surface);
+            }
            return new_surface;
        } else {
            // Delete the old surface before creating a new one to prevent collisions.
-            Unregister(surface);
+            UnregisterSurface(surface);
        }
    }

@@ -973,8 +982,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
 }

 static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
-                        const MathUtil::Rectangle<u32>& src_rect,
-                        const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+                        const Common::Rectangle<u32>& src_rect,
+                        const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
                        GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
                        std::size_t cubemap_face = 0) {

@@ -1104,7 +1113,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
 void RasterizerCacheOpenGL::FermiCopySurface(
    const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-    const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
+    const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {

    const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
    const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1201,4 +1210,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
    return {};
 }

+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+                                            u32 height) {
+    for (u32 i = 0; i < params.max_mip_level; i++) {
+        if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+            return {i};
+        }
+    }
+    return {};
+}
+
+static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
+    const std::size_t size = params.LayerMemorySize();
+    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+    for (u32 i = 0; i < params.depth; i++) {
+        if (start == addr) {
+            return {i};
+        }
+        start += size;
+    }
+    return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    const std::size_t src_memory_size = src_params.size_in_bytes;
+    const std::optional<u32> level =
+        TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+    if (level.has_value()) {
+        if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+            src_params.height == dst_params.MipHeight(*level) &&
+            src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+            const std::optional<u32> slot =
+                TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+            if (slot.has_value()) {
+                glCopyImageSubData(render_surface->Texture().handle,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+                                   blitted_surface->Texture().handle,
+                                   SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+                                   dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+                blitted_surface->MarkAsModified(true, cache);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+    const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+    if (bound2 > bound1)
+        return true;
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+                                                      Surface intersect) {
+    if (IsReinterpretInvalid(triggering_surface, intersect)) {
+        UnregisterSurface(intersect);
+        return false;
+    }
+    if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+        if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+            UnregisterSurface(intersect);
+            return false;
+        }
+        FlushObject(intersect);
+        FlushObject(triggering_surface);
+        intersect->MarkForReload(true);
+    }
+    return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+    if (texception && GLAD_GL_ARB_texture_barrier) {
+        glTextureBarrier();
+    }
+    texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+    for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+        if (current_color_buffers[i] != nullptr) {
+            Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+            if (intersect != nullptr) {
+                PartialReinterpretSurface(current_color_buffers[i], intersect);
+                texception = true;
+            }
+        }
+    }
+}
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {

 class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;

 using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
 using SurfaceType = VideoCore::Surface::SurfaceType;
 using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 struct SurfaceParams {
    enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
    }

    /// Returns the rectangle corresponding to this surface
-    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
+    Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
        return offset;
    }

+    std::size_t GetMipmapSingleSize(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, false, is_layered);
+    }
+
    u32 MipWidth(u32 mip_level) const {
        return std::max(1U, width >> mip_level);
    }

+    u32 MipWidthGobAligned(u32 mip_level) const {
+        return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+    }
+
    u32 MipHeight(u32 mip_level) const {
        return std::max(1U, height >> mip_level);
    }
@@ -346,6 +355,10 @@ public:
        return cached_size_in_bytes;
    }

+    std::size_t GetMemorySize() const {
+        return memory_size;
+    }
+
    void Flush() override {
        FlushGLBuffer();
    }
@@ -395,6 +408,26 @@ public:
                       Tegra::Texture::SwizzleSource swizzle_z,
                       Tegra::Texture::SwizzleSource swizzle_w);

+    void MarkReinterpreted() {
+        reinterpreted = true;
+    }
+
+    bool IsReinterpreted() const {
+        return reinterpreted;
+    }
+
+    void MarkForReload(bool reload) {
+        must_reload = reload;
+    }
+
+    bool MustReload() const {
+        return must_reload;
+    }
+
+    bool IsUploaded() const {
+        return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+    }
+
 private:
    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);

@@ -408,6 +441,9 @@ private:
    GLenum gl_internal_format{};
    std::size_t cached_size_in_bytes{};
    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+    std::size_t memory_size;
+    bool reinterpreted = false;
+    bool must_reload = false;
 };

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -430,8 +466,11 @@ public:
    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-                          const MathUtil::Rectangle<u32>& src_rect,
-                          const MathUtil::Rectangle<u32>& dst_rect);
+                          const Common::Rectangle<u32>& src_rect,
+                          const Common::Rectangle<u32>& dst_rect);
+
+    void SignalPreDrawCall();
+    void SignalPostDrawCall();

 private:
    void LoadSurface(const Surface& surface);
@@ -449,6 +488,10 @@ private:
    /// Tries to get a reserved surface for the specified parameters
    Surface TryGetReservedSurface(const SurfaceParams& params);

+    // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+    // returns true if the reinterpret was successful, false in case it was not.
+    bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
    /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
    void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +508,50 @@ private:
    OGLFramebuffer read_framebuffer;
    OGLFramebuffer draw_framebuffer;

+    bool texception = false;
+
    /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
    /// using the new format.
    OGLBuffer copy_pbo;

-    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
    Surface last_depth_buffer;
+
+    using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+    static auto GetReinterpretInterval(const Surface& object) {
+        return SurfaceInterval::right_open(object->GetAddr() + 1,
+                                           object->GetAddr() + object->GetMemorySize() - 1);
+    }
+
+    // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+    SurfaceIntervalCache reinterpreted_surfaces;
+
+    void RegisterReinterpretSurface(Surface reinterpret_surface) {
+        auto interval = GetReinterpretInterval(reinterpret_surface);
+        reinterpreted_surfaces.insert({interval, reinterpret_surface});
+        reinterpret_surface->MarkReinterpreted();
+    }
+
+    Surface CollideOnReinterpretedSurface(VAddr addr) const {
+        const SurfaceInterval interval{addr};
+        for (auto& pair :
+             boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+            return pair.second;
+        }
+        return nullptr;
+    }
+
+    /// Unregisters an object from the cache
+    void UnregisterSurface(const Surface& object) {
+        if (object->IsReinterpreted()) {
+            auto interval = GetReinterpretInterval(object);
+            reinterpreted_surfaces.erase(interval);
+        }
+        Unregister(object);
+    }
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#pragma once
-
 #include <cstring>
 #include <fmt/format.h>
 #include <lz4.h>
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const {

    if (has_delta) {
        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       textures.data());
+                       textures.data() + first);
    }
 }

@@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
    }
    if (has_delta) {
        glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       samplers.data());
+                       samplers.data() + first);
    }
 }

--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() {
    LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
 }

+void RendererOpenGL::AddTelemetryFields() {
+    const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+    const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
+    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
+    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
+
+    auto& telemetry_session = system.TelemetrySession();
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+    telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+}
+
 void RendererOpenGL::CreateRasterizer() {
    if (rasterizer) {
        return;
@@ -257,6 +272,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                 const Tegra::FramebufferConfig& framebuffer) {
    texture.width = framebuffer.width;
    texture.height = framebuffer.height;
+    texture.pixel_format = framebuffer.pixel_format;

    GLint internal_format;
    switch (framebuffer.pixel_format) {
@@ -465,17 +481,7 @@ bool RendererOpenGL::Init() {
        glDebugMessageCallback(DebugHandler, nullptr);
    }

-    const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
-    const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
-    const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
-
-    LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
-    LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
-    LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
-
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
-    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+    AddTelemetryFields();

    if (!GLAD_GL_VERSION_4_3) {
        return false;
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
 /// Structure used for storing information about the display target for the Switch screen
 struct ScreenInfo {
    GLuint display_texture;
-    const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
+    const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
    TextureInfo texture;
 };

@@ -60,6 +60,7 @@ public:

 private:
    void InitOpenGLObjects();
+    void AddTelemetryFields();
    void CreateRasterizer();

    void ConfigureFramebufferTexture(TextureInfo& texture,
@@ -102,7 +103,7 @@ private:

    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
-    MathUtil::Rectangle<int> framebuffer_crop_rect;
+    Common::Rectangle<int> framebuffer_crop_rect;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -0,0 +1,483 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/surface.h"
+
+namespace Vulkan::MaxwellToVK {
+
+namespace Sampler {
+
+vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
+    switch (filter) {
+    case Tegra::Texture::TextureFilter::Linear:
+        return vk::Filter::eLinear;
+    case Tegra::Texture::TextureFilter::Nearest:
+        return vk::Filter::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+    return {};
+}
+
+vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
+    switch (mipmap_filter) {
+    case Tegra::Texture::TextureMipmapFilter::None:
+        // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
+        // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
+        // use an image view with a single mipmap level to emulate this.
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Linear:
+        return vk::SamplerMipmapMode::eLinear;
+    case Tegra::Texture::TextureMipmapFilter::Nearest:
+        return vk::SamplerMipmapMode::eNearest;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+    return {};
+}
+
+vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) {
+    switch (wrap_mode) {
+    case Tegra::Texture::WrapMode::Wrap:
+        return vk::SamplerAddressMode::eRepeat;
+    case Tegra::Texture::WrapMode::Mirror:
+        return vk::SamplerAddressMode::eMirroredRepeat;
+    case Tegra::Texture::WrapMode::ClampToEdge:
+        return vk::SamplerAddressMode::eClampToEdge;
+    case Tegra::Texture::WrapMode::Border:
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::ClampOGL:
+        // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+        // eClampToBorder to get the border color of the texture, and then sample the edge to
+        // manually mix them. However the shader part of this is not yet implemented.
+        return vk::SamplerAddressMode::eClampToBorder;
+    case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    case Tegra::Texture::WrapMode::MirrorOnceBorder:
+        UNIMPLEMENTED();
+        return vk::SamplerAddressMode::eMirrorClampToEdge;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+    return {};
+}
+
+vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
+    switch (depth_compare_func) {
+    case Tegra::Texture::DepthCompareFunc::Never:
+        return vk::CompareOp::eNever;
+    case Tegra::Texture::DepthCompareFunc::Less:
+        return vk::CompareOp::eLess;
+    case Tegra::Texture::DepthCompareFunc::LessEqual:
+        return vk::CompareOp::eLessOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Equal:
+        return vk::CompareOp::eEqual;
+    case Tegra::Texture::DepthCompareFunc::NotEqual:
+        return vk::CompareOp::eNotEqual;
+    case Tegra::Texture::DepthCompareFunc::Greater:
+        return vk::CompareOp::eGreater;
+    case Tegra::Texture::DepthCompareFunc::GreaterEqual:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Tegra::Texture::DepthCompareFunc::Always:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
+                      static_cast<u32>(depth_compare_func));
+    return {};
+}
+
+} // namespace Sampler
+
+struct FormatTuple {
+    vk::Format format;            ///< Vulkan format
+    ComponentType component_type; ///< Abstracted component type
+    bool attachable;              ///< True when this format can be used as an attachment
+};
+
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
+    {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true},    // ABGR8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false},     // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // A1B5G5R5U
+    {vk::Format::eR8Unorm, ComponentType::UNorm, true},                // R8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R8UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R11FG11FB10F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false},     // DXT1
+    {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false},         // DXT23
+    {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false},         // DXT45
+    {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false},         // DXN1
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2UNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2SNORM
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC7U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_UF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_SF16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_4X4
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BGRA8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16F
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16I
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGB32F
+    {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true},     // RGBA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8U
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8S
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32UI
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_5X4
+
+    // Compressed sRGB formats
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
+    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+
+    // Depth formats
+    {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
+    {vk::Format::eD16Unorm, ComponentType::UNorm, true},  // Z16
+
+    // DepthStencil formats
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
+    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
+    {vk::Format::eUndefined, ComponentType::Invalid, false},   // Z32FS8
+}};
+
+static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
+    return pixel_format >= PixelFormat::MaxColorFormat &&
+           pixel_format < PixelFormat::MaxDepthStencilFormat;
+}
+
+std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
+                                          PixelFormat pixel_format, ComponentType component_type) {
+    ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
+
+    const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
+    UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
+                         "Unimplemented texture format with pixel format={} and component type={}",
+                         static_cast<u32>(pixel_format), static_cast<u32>(component_type));
+    ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+
+    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
+                 vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
+    if (tuple.attachable) {
+        usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment
+                                            : vk::FormatFeatureFlagBits::eColorAttachment;
+    }
+    return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
+}
+
+vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) {
+    switch (stage) {
+    case Maxwell::ShaderStage::Vertex:
+        return vk::ShaderStageFlagBits::eVertex;
+    case Maxwell::ShaderStage::TesselationControl:
+        return vk::ShaderStageFlagBits::eTessellationControl;
+    case Maxwell::ShaderStage::TesselationEval:
+        return vk::ShaderStageFlagBits::eTessellationEvaluation;
+    case Maxwell::ShaderStage::Geometry:
+        return vk::ShaderStageFlagBits::eGeometry;
+    case Maxwell::ShaderStage::Fragment:
+        return vk::ShaderStageFlagBits::eFragment;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
+    return {};
+}
+
+vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
+    switch (topology) {
+    case Maxwell::PrimitiveTopology::Points:
+        return vk::PrimitiveTopology::ePointList;
+    case Maxwell::PrimitiveTopology::Lines:
+        return vk::PrimitiveTopology::eLineList;
+    case Maxwell::PrimitiveTopology::LineStrip:
+        return vk::PrimitiveTopology::eLineStrip;
+    case Maxwell::PrimitiveTopology::Triangles:
+        return vk::PrimitiveTopology::eTriangleList;
+    case Maxwell::PrimitiveTopology::TriangleStrip:
+        return vk::PrimitiveTopology::eTriangleStrip;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+    return {};
+}
+
+vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
+    switch (type) {
+    case Maxwell::VertexAttribute::Type::SignedNorm:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedNorm:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+            return vk::Format::eR8G8B8A8Unorm;
+        default:
+            break;
+        }
+        break;
+    case Maxwell::VertexAttribute::Type::SignedInt:
+        break;
+    case Maxwell::VertexAttribute::Type::UnsignedInt:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Uint;
+        default:
+            break;
+        }
+    case Maxwell::VertexAttribute::Type::UnsignedScaled:
+    case Maxwell::VertexAttribute::Type::SignedScaled:
+        break;
+    case Maxwell::VertexAttribute::Type::Float:
+        switch (size) {
+        case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+            return vk::Format::eR32G32B32A32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32_32:
+            return vk::Format::eR32G32B32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32_32:
+            return vk::Format::eR32G32Sfloat;
+        case Maxwell::VertexAttribute::Size::Size_32:
+            return vk::Format::eR32Sfloat;
+        default:
+            break;
+        }
+        break;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type),
+                      static_cast<u32>(size));
+    return {};
+}
+
+vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
+    switch (comparison) {
+    case Maxwell::ComparisonOp::Never:
+    case Maxwell::ComparisonOp::NeverOld:
+        return vk::CompareOp::eNever;
+    case Maxwell::ComparisonOp::Less:
+    case Maxwell::ComparisonOp::LessOld:
+        return vk::CompareOp::eLess;
+    case Maxwell::ComparisonOp::Equal:
+    case Maxwell::ComparisonOp::EqualOld:
+        return vk::CompareOp::eEqual;
+    case Maxwell::ComparisonOp::LessEqual:
+    case Maxwell::ComparisonOp::LessEqualOld:
+        return vk::CompareOp::eLessOrEqual;
+    case Maxwell::ComparisonOp::Greater:
+    case Maxwell::ComparisonOp::GreaterOld:
+        return vk::CompareOp::eGreater;
+    case Maxwell::ComparisonOp::NotEqual:
+    case Maxwell::ComparisonOp::NotEqualOld:
+        return vk::CompareOp::eNotEqual;
+    case Maxwell::ComparisonOp::GreaterEqual:
+    case Maxwell::ComparisonOp::GreaterEqualOld:
+        return vk::CompareOp::eGreaterOrEqual;
+    case Maxwell::ComparisonOp::Always:
+    case Maxwell::ComparisonOp::AlwaysOld:
+        return vk::CompareOp::eAlways;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
+    return {};
+}
+
+vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format");
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return vk::IndexType::eUint16;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return vk::IndexType::eUint32;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
+    return {};
+}
+
+vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) {
+    switch (stencil_op) {
+    case Maxwell::StencilOp::Keep:
+    case Maxwell::StencilOp::KeepOGL:
+        return vk::StencilOp::eKeep;
+    case Maxwell::StencilOp::Zero:
+    case Maxwell::StencilOp::ZeroOGL:
+        return vk::StencilOp::eZero;
+    case Maxwell::StencilOp::Replace:
+    case Maxwell::StencilOp::ReplaceOGL:
+        return vk::StencilOp::eReplace;
+    case Maxwell::StencilOp::Incr:
+    case Maxwell::StencilOp::IncrOGL:
+        return vk::StencilOp::eIncrementAndClamp;
+    case Maxwell::StencilOp::Decr:
+    case Maxwell::StencilOp::DecrOGL:
+        return vk::StencilOp::eDecrementAndClamp;
+    case Maxwell::StencilOp::Invert:
+    case Maxwell::StencilOp::InvertOGL:
+        return vk::StencilOp::eInvert;
+    case Maxwell::StencilOp::IncrWrap:
+    case Maxwell::StencilOp::IncrWrapOGL:
+        return vk::StencilOp::eIncrementAndWrap;
+    case Maxwell::StencilOp::DecrWrap:
+    case Maxwell::StencilOp::DecrWrapOGL:
+        return vk::StencilOp::eDecrementAndWrap;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
+    return {};
+}
+
+vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+    case Maxwell::Blend::Equation::AddGL:
+        return vk::BlendOp::eAdd;
+    case Maxwell::Blend::Equation::Subtract:
+    case Maxwell::Blend::Equation::SubtractGL:
+        return vk::BlendOp::eSubtract;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+    case Maxwell::Blend::Equation::ReverseSubtractGL:
+        return vk::BlendOp::eReverseSubtract;
+    case Maxwell::Blend::Equation::Min:
+    case Maxwell::Blend::Equation::MinGL:
+        return vk::BlendOp::eMin;
+    case Maxwell::Blend::Equation::Max:
+    case Maxwell::Blend::Equation::MaxGL:
+        return vk::BlendOp::eMax;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
+    return {};
+}
+
+vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+    case Maxwell::Blend::Factor::ZeroGL:
+        return vk::BlendFactor::eZero;
+    case Maxwell::Blend::Factor::One:
+    case Maxwell::Blend::Factor::OneGL:
+        return vk::BlendFactor::eOne;
+    case Maxwell::Blend::Factor::SourceColor:
+    case Maxwell::Blend::Factor::SourceColorGL:
+        return vk::BlendFactor::eSrcColor;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+    case Maxwell::Blend::Factor::OneMinusSourceColorGL:
+        return vk::BlendFactor::eOneMinusSrcColor;
+    case Maxwell::Blend::Factor::SourceAlpha:
+    case Maxwell::Blend::Factor::SourceAlphaGL:
+        return vk::BlendFactor::eSrcAlpha;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+    case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
+        return vk::BlendFactor::eOneMinusSrcAlpha;
+    case Maxwell::Blend::Factor::DestAlpha:
+    case Maxwell::Blend::Factor::DestAlphaGL:
+        return vk::BlendFactor::eDstAlpha;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+    case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
+        return vk::BlendFactor::eOneMinusDstAlpha;
+    case Maxwell::Blend::Factor::DestColor:
+    case Maxwell::Blend::Factor::DestColorGL:
+        return vk::BlendFactor::eDstColor;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+    case Maxwell::Blend::Factor::OneMinusDestColorGL:
+        return vk::BlendFactor::eOneMinusDstColor;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+    case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
+        return vk::BlendFactor::eSrcAlphaSaturate;
+    case Maxwell::Blend::Factor::Source1Color:
+    case Maxwell::Blend::Factor::Source1ColorGL:
+        return vk::BlendFactor::eSrc1Color;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+    case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
+        return vk::BlendFactor::eOneMinusSrc1Color;
+    case Maxwell::Blend::Factor::Source1Alpha:
+    case Maxwell::Blend::Factor::Source1AlphaGL:
+        return vk::BlendFactor::eSrc1Alpha;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+    case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
+        return vk::BlendFactor::eOneMinusSrc1Alpha;
+    case Maxwell::Blend::Factor::ConstantColor:
+    case Maxwell::Blend::Factor::ConstantColorGL:
+        return vk::BlendFactor::eConstantColor;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+    case Maxwell::Blend::Factor::OneMinusConstantColorGL:
+        return vk::BlendFactor::eOneMinusConstantColor;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+    case Maxwell::Blend::Factor::ConstantAlphaGL:
+        return vk::BlendFactor::eConstantAlpha;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+    case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
+        return vk::BlendFactor::eOneMinusConstantAlpha;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
+    return {};
+}
+
+vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) {
+    switch (front_face) {
+    case Maxwell::Cull::FrontFace::ClockWise:
+        return vk::FrontFace::eClockwise;
+    case Maxwell::Cull::FrontFace::CounterClockWise:
+        return vk::FrontFace::eCounterClockwise;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
+    return {};
+}
+
+vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) {
+    switch (cull_face) {
+    case Maxwell::Cull::CullFace::Front:
+        return vk::CullModeFlagBits::eFront;
+    case Maxwell::Cull::CullFace::Back:
+        return vk::CullModeFlagBits::eBack;
+    case Maxwell::Cull::CullFace::FrontAndBack:
+        return vk::CullModeFlagBits::eFrontAndBack;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
+    return {};
+}
+
+vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
+    switch (swizzle) {
+    case Tegra::Texture::SwizzleSource::Zero:
+        return vk::ComponentSwizzle::eZero;
+    case Tegra::Texture::SwizzleSource::R:
+        return vk::ComponentSwizzle::eR;
+    case Tegra::Texture::SwizzleSource::G:
+        return vk::ComponentSwizzle::eG;
+    case Tegra::Texture::SwizzleSource::B:
+        return vk::ComponentSwizzle::eB;
+    case Tegra::Texture::SwizzleSource::A:
+        return vk::ComponentSwizzle::eA;
+    case Tegra::Texture::SwizzleSource::OneInt:
+    case Tegra::Texture::SwizzleSource::OneFloat:
+        return vk::ComponentSwizzle::eOne;
+    }
+    UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
+    return {};
+}
+
+} // namespace Vulkan::MaxwellToVK
--- a/Show More
+++ b/Show More