Fix getopt on systems where char is unsigned by default

Merge pull request #2258 from lioncash/am
service/am: Supply remaining missing IAudioController functions
2019-03-19 23:53:40 +01:00 · 2019-03-18 22:20:36 -04:00 · 2019-03-18 11:13:52 -04:00 · 2019-03-18 11:13:20 -04:00 · 2019-03-18 10:38:01 -04:00 · 2019-03-18 09:18:34 -04:00
257 changed files with 8518 additions and 4098 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,6 @@
 [submodule "discord-rpc"]
    path = externals/discord-rpc
    url = https://github.com/discordapp/discord-rpc.git
+[submodule "Vulkan-Headers"]
+    path = externals/Vulkan-Headers
+    url = https://github.com/KhronosGroup/Vulkan-Headers.git
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ matrix:
    - os: osx
      env: NAME="macos build"
      sudo: false
-      osx_image: xcode10
+      osx_image: xcode10.1
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -2,7 +2,7 @@

 set -o pipefail

-export MACOSX_DEPLOYMENT_TARGET=10.13
+export MACOSX_DEPLOYMENT_TARGET=10.14
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
 export PATH="/usr/local/opt/ccache/libexec:$PATH"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF

 option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)

+option(ENABLE_VULKAN "Enables Vulkan backend" ON)
+
 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

 if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
@@ -161,12 +163,6 @@ else()
    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE)
 endif()

-# Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic)
-# See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1
-if (CMAKE_COMPILER_IS_GNUCC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching")
-endif()
-
 # Set file offset size to 64 bits.
 #
 # On modern Unixes, this is typically already the case. The lone exception is
@@ -183,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
 # System imported libraries
 # ======================

-find_package(Boost 1.63.0 QUIET)
+find_package(Boost 1.66.0 QUIET)
 if (NOT Boost_FOUND)
-    message(STATUS "Boost 1.63.0 or newer not found, falling back to externals")
+    message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")

    set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
    set(Boost_NO_SYSTEM_PATHS OFF)
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
+    "${VIDEO_CORE}/shader/decode/texture.cpp"
    "${VIDEO_CORE}/shader/decode/other.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ yuzu is an experimental open-source emulator for the Nintendo Switch from the cr

 It is written in C++ with portability in mind, with builds actively maintained for Windows, Linux and macOS. The emulator is currently only useful for homebrew development and research purposes.

-yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success, but does not implement any of the necessary GPU features to render 3D graphics.
+yuzu only emulates a subset of Switch hardware and therefore is generally only useful for running/debugging homebrew applications. At this time, yuzu cannot play any commercial games without major problems. yuzu can boot some games, to varying degrees of success.

 yuzu is licensed under the GPLv2 (or any later version). Refer to the license.txt file included.

--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/externals/cubeb
+++ b/externals/cubeb
--- a/externals/opus
+++ b/externals/opus
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
    return {};
 }

-StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
+StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
+                               u32 num_channels, std::string&& name,
                               Stream::ReleaseCallback&& release_callback) {
    if (!sink) {
        sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
    }

    return std::make_shared<Stream>(
-        sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
+        core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
        sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }

--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
 #include "audio_core/stream.h"
 #include "common/common_types.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace AudioCore {

 /**
@@ -21,8 +25,8 @@ namespace AudioCore {
 class AudioOut {
 public:
    /// Opens a new audio stream
-    StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
-                         Stream::ReleaseCallback&& release_callback);
+    StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
+                         std::string&& name, Stream::ReleaseCallback&& release_callback);

    /// Returns a vector of recently released buffers specified by tag for the specified stream
    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
 #include "audio_core/codec.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"

@@ -71,14 +72,14 @@ private:
    EffectOutStatus out_status{};
    EffectInStatus info{};
 };
-AudioRenderer::AudioRenderer(AudioRendererParameter params,
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                             Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
    : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
      effects(params.effect_count) {

    audio_out = std::make_unique<AudioCore::AudioOut>();
-    stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer",
-                                   [=]() { buffer_event->Signal(); });
+    stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
+                                   "AudioRenderer", [=]() { buffer_event->Signal(); });
    audio_out->StartStream(stream);

    QueueMixedBuffer(0);
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
 #include "common/swap.h"
 #include "core/hle/kernel/object.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Kernel {
 class WritableEvent;
 }
@@ -42,16 +46,18 @@ struct AudioRendererParameter {
    u32_le sample_rate;
    u32_le sample_count;
    u32_le mix_buffer_count;
-    u32_le unknown_c;
+    u32_le submix_count;
    u32_le voice_count;
    u32_le sink_count;
    u32_le effect_count;
-    u32_le unknown_1c;
-    u8 unknown_20;
-    INSERT_PADDING_BYTES(3);
+    u32_le performance_frame_count;
+    u8 is_voice_drop_enabled;
+    u8 unknown_21;
+    u8 unknown_22;
+    u8 execution_mode;
    u32_le splitter_count;
-    u32_le unknown_2c;
-    INSERT_PADDING_WORDS(1);
+    u32_le num_splitter_send_channels;
+    u32_le unknown_30;
    u32_le revision;
 };
 static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
@@ -208,7 +214,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size

 class AudioRenderer {
 public:
-    AudioRenderer(AudioRendererParameter params,
+    AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                  Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
    ~AudioRenderer();

--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
    Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}

    /// Returns the raw audio data for the buffer
-    std::vector<s16>& Samples() {
+    std::vector<s16>& GetSamples() {
        return samples;
    }

--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
        }
    }

-    state.yn1 = yn1;
-    state.yn2 = yn2;
+    state.yn1 = static_cast<s16>(yn1);
+    state.yn2 = static_cast<s16>(yn2);

    return ret;
 }
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"

+#ifdef _WIN32
+#include <objbase.h>
+#endif
+
 namespace AudioCore {

 class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
        }
    }

-    ~CubebSinkStream() {
+    ~CubebSinkStream() override {
        if (!ctx) {
            return;
        }
@@ -75,11 +79,11 @@ public:
        queue.Push(samples);
    }

-    std::size_t SamplesInQueue(u32 num_channels) const override {
+    std::size_t SamplesInQueue(u32 channel_count) const override {
        if (!ctx)
            return 0;

-        return queue.Size() / num_channels;
+        return queue.Size() / channel_count;
    }

    void Flush() override {
@@ -98,7 +102,7 @@ private:
    u32 num_channels{};

    Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame;
+    std::array<s16, 2> last_frame{};
    std::atomic<bool> should_flush{};
    TimeStretcher time_stretch;

@@ -108,6 +112,11 @@ private:
 };

 CubebSink::CubebSink(std::string_view target_device_name) {
+    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
+#ifdef _WIN32
+    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+#endif
+
    if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
        LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
        return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
    }

    cubeb_destroy(ctx);
+
+#ifdef _WIN32
+    if (SUCCEEDED(com_init_result)) {
+        CoUninitialize();
+    }
+#endif
 }

 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
    cubeb* ctx{};
    cubeb_devid output_device{};
    std::vector<SinkStreamPtr> sink_streams;
+
+#ifdef _WIN32
+    u32 com_init_result = 0;
+#endif
 };

 std::vector<std::string> ListCubebSinkDevices();
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const {
    return {};
 }

-Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-               SinkStream& sink_stream, std::string&& name_)
+Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+               ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
    : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
-      sink_stream{sink_stream}, name{std::move(name_)} {
+      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {

-    release_event = Core::Timing::RegisterEvent(
+    release_event = core_timing.RegisterEvent(
        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
 }

@@ -95,12 +95,11 @@ void Stream::PlayNextBuffer() {
    active_buffer = queued_buffers.front();
    queued_buffers.pop();

-    VolumeAdjustSamples(active_buffer->Samples());
+    VolumeAdjustSamples(active_buffer->GetSamples());

    sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

-    Core::Timing::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event,
-                                          {});
+    core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }

 void Stream::ReleaseActiveBuffer() {
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -14,8 +14,9 @@
 #include "common/common_types.h"

 namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace AudioCore {

@@ -42,8 +43,8 @@ public:
    /// Callback function type, used to change guest state on a buffer being released
    using ReleaseCallback = std::function<void()>;

-    Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-           SinkStream& sink_stream, std::string&& name_);
+    Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+           ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);

    /// Plays the audio stream
    void Play();
@@ -100,6 +101,7 @@ private:
    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
    SinkStream& sink_stream;                  ///< Output sink for the stream
+    Core::Timing::CoreTiming& core_timing;    ///< Core timing instance.
    std::string name;                         ///< Name of the stream, must be unique
 };

--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
+      "${VIDEO_CORE}/shader/decode/texture.cpp"
      "${VIDEO_CORE}/shader/decode/other.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
@@ -91,10 +92,14 @@ add_library(common STATIC
    logging/text_formatter.cpp
    logging/text_formatter.h
    math_util.h
+    memory_hook.cpp
+    memory_hook.h
    microprofile.cpp
    microprofile.h
    microprofileui.h
    misc.cpp
+    page_table.cpp
+    page_table.h
    param_package.cpp
    param_package.h
    quaternion.h
@@ -113,6 +118,8 @@ add_library(common STATIC
    threadsafe_queue.h
    timer.cpp
    timer.h
+    uint128.cpp
+    uint128.h
    vector_math.h
    web_result.h
 )
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -111,12 +111,6 @@
 template <std::size_t Position, std::size_t Bits, typename T>
 struct BitField {
 private:
-    // We hide the copy assigment operator here, because the default copy
-    // assignment would copy the full storage value, rather than just the bits
-    // relevant to this particular bit field.
-    // We don't delete it because we want BitField to be trivially copyable.
-    constexpr BitField& operator=(const BitField&) = default;
-
    // UnderlyingType is T for non-enum types and the underlying type of T if
    // T is an enumeration. Note that T is wrapped within an enable_if in the
    // former case to workaround compile errors which arise when using
@@ -163,9 +157,13 @@ public:
    BitField(T val) = delete;
    BitField& operator=(T val) = delete;

-    // Force default constructor to be created
-    // so that we can use this within unions
-    constexpr BitField() = default;
+    constexpr BitField() noexcept = default;
+
+    constexpr BitField(const BitField&) noexcept = default;
+    constexpr BitField& operator=(const BitField&) noexcept = default;
+
+    constexpr BitField(BitField&&) noexcept = default;
+    constexpr BitField& operator=(BitField&&) noexcept = default;

    constexpr FORCE_INLINE operator T() const {
        return Value();
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
 /**
 * Decode a color stored in RGBA8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
    return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }

 /**
 * Decode a color stored in RGB8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
    return {bytes[2], bytes[1], bytes[0], 255};
 }

 /**
 * Decode a color stored in RG8 (aka HILO8) format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
    return {bytes[1], bytes[0], 0, 255};
 }

 /**
 * Decode a color stored in RGB565 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
 /**
 * Decode a color stored in RGB5A1 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
 /**
 * Decode a color stored in RGBA4 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
 /**
 * Decode a depth value and a stencil value stored in D24S8 format
 * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Math::Vec2
+ * @return Resulting values stored as a Common::Vec2
 */
-inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
    return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }

@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[3] = color.r();
    bytes[2] = color.g();
    bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[2] = color.r();
    bytes[1] = color.g();
    bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[1] = color.r();
    bytes[0] = color.g();
 }
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data =
        (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());

@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());

@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());

--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -39,10 +39,10 @@ public:
    Impl(Impl const&) = delete;
    const Impl& operator=(Impl const&) = delete;

-    void PushEntry(Entry e) {
-        std::lock_guard<std::mutex> lock(message_mutex);
-        message_queue.Push(std::move(e));
-        message_cv.notify_one();
+    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
+                   const char* function, std::string message) {
+        message_queue.Push(
+            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)));
    }

    void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,15 +86,13 @@ private:
                }
            };
            while (true) {
-                {
-                    std::unique_lock<std::mutex> lock(message_mutex);
-                    message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
-                }
-                if (!running) {
+                entry = message_queue.PopWait();
+                if (entry.final_entry) {
                    break;
                }
                write_logs(entry);
            }
+
            // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
            // where a system is repeatedly spamming logs even on close.
            const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,18 +104,36 @@ private:
    }

    ~Impl() {
-        running = false;
-        message_cv.notify_one();
+        Entry entry;
+        entry.final_entry = true;
+        message_queue.Push(entry);
        backend_thread.join();
    }

-    std::atomic_bool running{true};
-    std::mutex message_mutex, writing_mutex;
-    std::condition_variable message_cv;
+    Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
+                      const char* function, std::string message) const {
+        using std::chrono::duration_cast;
+        using std::chrono::steady_clock;
+
+        Entry entry;
+        entry.timestamp =
+            duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
+        entry.log_class = log_class;
+        entry.log_level = log_level;
+        entry.filename = Common::TrimSourcePath(filename);
+        entry.line_num = line_nr;
+        entry.function = function;
+        entry.message = std::move(message);
+
+        return entry;
+    }
+
+    std::mutex writing_mutex;
    std::thread backend_thread;
    std::vector<std::unique_ptr<Backend>> backends;
    Common::MPSCQueue<Log::Entry> message_queue;
    Filter filter;
+    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
 };

 void ConsoleBackend::Write(const Entry& entry) {
@@ -232,6 +248,7 @@ void DebuggerBackend::Write(const Entry& entry) {
    CLS(Render)                                                                                    \
    SUB(Render, Software)                                                                          \
    SUB(Render, OpenGL)                                                                            \
+    SUB(Render, Vulkan)                                                                            \
    CLS(Audio)                                                                                     \
    SUB(Audio, DSP)                                                                                \
    SUB(Audio, Sink)                                                                               \
@@ -275,25 +292,6 @@ const char* GetLevelName(Level log_level) {
 #undef LVL
 }

-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message) {
-    using std::chrono::duration_cast;
-    using std::chrono::steady_clock;
-
-    static steady_clock::time_point time_origin = steady_clock::now();
-
-    Entry entry;
-    entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin);
-    entry.log_class = log_class;
-    entry.log_level = log_level;
-    entry.filename = Common::TrimSourcePath(filename);
-    entry.line_num = line_nr;
-    entry.function = function;
-    entry.message = std::move(message);
-
-    return entry;
-}
-
 void SetGlobalFilter(const Filter& filter) {
    Impl::Instance().SetGlobalFilter(filter);
 }
@@ -318,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
    if (!filter.CheckMessage(log_class, log_level))
        return;

-    Entry entry =
-        CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args));
-
-    instance.PushEntry(std::move(entry));
+    instance.PushEntry(log_class, log_level, filename, line_num, function,
+                       fmt::vformat(format, args));
 }
 } // namespace Log
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
    unsigned int line_num;
    std::string function;
    std::string message;
+    bool final_entry = false;

    Entry() = default;
    Entry(Entry&& o) = default;
@@ -134,10 +135,6 @@ const char* GetLogClassName(Class log_class);
 */
 const char* GetLevelName(Level log_level);

-/// Creates a log entry by formatting the given source location, and message.
-Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
-                  const char* function, std::string message);
-
 /**
 * The global filter will prevent any messages from even being processed if they are filtered. Each
 * backend can have a filter, but if the level is lower than the global filter, the backend will
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
    Render,            ///< Emulator video output and hardware acceleration
    Render_Software,   ///< Software renderer backend
    Render_OpenGL,     ///< OpenGL backend
+    Render_Vulkan,     ///< Vulkan backend
    Audio,             ///< Audio emulation
    Audio_DSP,         ///< The HLE implementation of the DSP
    Audio_Sink,        ///< Emulator audio output backend
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
 #include <cstdlib>
 #include <type_traits>

-namespace MathUtil {
+namespace Common {

 constexpr float PI = 3.14159265f;

@@ -41,4 +41,4 @@ struct Rectangle {
    }
 };

-} // namespace MathUtil
+} // namespace Common
--- a/src/common/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include "core/memory_hook.h"
+#include "common/memory_hook.h"

-namespace Memory {
+namespace Common {

 MemoryHook::~MemoryHook() = default;

-} // namespace Memory
+} // namespace Common
--- a/src/common/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@

 #include "common/common_types.h"

-namespace Memory {
+namespace Common {

 /**
 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
 };

 using MemoryHookPointer = std::shared_ptr<MemoryHook>;
-} // namespace Memory
+} // namespace Common
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/page_table.h"
+
+namespace Common {
+
+PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
+
+PageTable::~PageTable() = default;
+
+void PageTable::Resize(std::size_t address_space_width_in_bits) {
+    const std::size_t num_page_table_entries = 1ULL
+                                               << (address_space_width_in_bits - page_size_in_bits);
+
+    pointers.resize(num_page_table_entries);
+    attributes.resize(num_page_table_entries);
+
+    // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+    // vector size is subsequently decreased (via resize), the vector might not automatically
+    // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+    // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+    pointers.shrink_to_fit();
+    attributes.shrink_to_fit();
+}
+
+} // namespace Common
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <boost/icl/interval_map.hpp>
+#include "common/common_types.h"
+#include "common/memory_hook.h"
+
+namespace Common {
+
+enum class PageType : u8 {
+    /// Page is unmapped and should cause an access error.
+    Unmapped,
+    /// Page is mapped to regular memory. This is the only type you can get pointers to.
+    Memory,
+    /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+    /// invalidation
+    RasterizerCachedMemory,
+    /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+    Special,
+};
+
+struct SpecialRegion {
+    enum class Type {
+        DebugHook,
+        IODevice,
+    } type;
+
+    MemoryHookPointer handler;
+
+    bool operator<(const SpecialRegion& other) const {
+        return std::tie(type, handler) < std::tie(other.type, other.handler);
+    }
+
+    bool operator==(const SpecialRegion& other) const {
+        return std::tie(type, handler) == std::tie(other.type, other.handler);
+    }
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works.
+ */
+struct PageTable {
+    explicit PageTable(std::size_t page_size_in_bits);
+    ~PageTable();
+
+    /**
+     * Resizes the page table to be able to accomodate enough pages within
+     * a given address space.
+     *
+     * @param address_space_width_in_bits The address size width in bits.
+     */
+    void Resize(std::size_t address_space_width_in_bits);
+
+    /**
+     * Vector of memory pointers backing each page. An entry can only be non-null if the
+     * corresponding entry in the `attributes` vector is of type `Memory`.
+     */
+    std::vector<u8*> pointers;
+
+    /**
+     * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
+     * of type `Special`.
+     */
+    boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
+
+    /**
+     * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
+     * the corresponding entry in `pointers` MUST be set to null.
+     */
+    std::vector<PageType> attributes;
+
+    const std::size_t page_size_in_bits{};
+};
+
+} // namespace Common
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@

 #include "common/vector_math.h"

-namespace Math {
+namespace Common {

 template <typename T>
 class Quaternion {
 public:
-    Math::Vec3<T> xyz;
+    Vec3<T> xyz;
    T w{};

    Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
 };

 template <typename T>
-auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
+auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
    return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
 }

-inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
+inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
    return {axis * std::sin(angle / 2), std::cos(angle / 2)};
 }

-} // namespace Math
+} // namespace Common
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC 4.6+
-#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+// GCC
+#ifdef __GNUC__

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif __clang__
+#elif defined(__clang__)

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
 // a simple lockless thread-safe,
 // single reader, single writer queue

-#include <algorithm>
 #include <atomic>
+#include <condition_variable>
 #include <cstddef>
 #include <mutex>
-#include "common/common_types.h"
+#include <utility>

 namespace Common {
-template <typename T, bool NeedSize = true>
+template <typename T>
 class SPSCQueue {
 public:
-    SPSCQueue() : size(0) {
+    SPSCQueue() {
        write_ptr = read_ptr = new ElementPtr();
    }
    ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
        delete read_ptr;
    }

-    u32 Size() const {
-        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
+    std::size_t Size() const {
        return size.load();
    }

    bool Empty() const {
-        return !read_ptr->next.load();
+        return Size() == 0;
    }

    T& Front() const {
@@ -47,13 +46,14 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        if (NeedSize)
-            size++;
+        cv.notify_one();
+
+        ++size;
    }

    void Pop() {
-        if (NeedSize)
-            size--;
+        --size;
+
        ElementPtr* tmpptr = read_ptr;
        // advance the read pointer
        read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
        if (Empty())
            return false;

-        if (NeedSize)
-            size--;
+        --size;

        ElementPtr* tmpptr = read_ptr;
        read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
        return true;
    }

+    T PopWait() {
+        if (Empty()) {
+            std::unique_lock<std::mutex> lock(cv_mutex);
+            cv.wait(lock, [this]() { return !Empty(); });
+        }
+        T t;
+        Pop(t);
+        return t;
+    }
+
    // not thread-safe
    void Clear() {
        size.store(0);
@@ -89,7 +98,7 @@ private:
    // and a pointer to the next ElementPtr
    class ElementPtr {
    public:
-        ElementPtr() : next(nullptr) {}
+        ElementPtr() {}
        ~ElementPtr() {
            ElementPtr* next_ptr = next.load();

@@ -98,21 +107,23 @@ private:
        }

        T current;
-        std::atomic<ElementPtr*> next;
+        std::atomic<ElementPtr*> next{nullptr};
    };

    ElementPtr* write_ptr;
    ElementPtr* read_ptr;
-    std::atomic<u32> size;
+    std::atomic_size_t size{0};
+    std::mutex cv_mutex;
+    std::condition_variable cv;
 };

 // a simple thread-safe,
 // single reader, multiple writer queue

-template <typename T, bool NeedSize = true>
+template <typename T>
 class MPSCQueue {
 public:
-    u32 Size() const {
+    std::size_t Size() const {
        return spsc_queue.Size();
    }

@@ -138,13 +149,17 @@ public:
        return spsc_queue.Pop(t);
    }

+    T PopWait() {
+        return spsc_queue.PopWait();
+    }
+
    // not thread-safe
    void Clear() {
        spsc_queue.Clear();
    }

 private:
-    SPSCQueue<T, NeedSize> spsc_queue;
+    SPSCQueue<T> spsc_queue;
    std::mutex write_lock;
 };
 } // namespace Common
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
 #include <cmath>
 #include <type_traits>

-namespace Math {
+namespace Common {

 template <typename T>
 class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
    return MakeVec(x, yzw[0], yzw[1], yzw[2]);
 }

-} // namespace Math
+} // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -217,6 +217,7 @@ add_library(core STATIC
    hle/service/audio/audren_u.h
    hle/service/audio/codecctl.cpp
    hle/service/audio/codecctl.h
+    hle/service/audio/errors.h
    hle/service/audio/hwopus.cpp
    hle/service/audio/hwopus.h
    hle/service/bcat/bcat.cpp
@@ -400,6 +401,10 @@ add_library(core STATIC
    hle/service/time/time.h
    hle/service/usb/usb.cpp
    hle/service/usb/usb.h
+    hle/service/vi/display/vi_display.cpp
+    hle/service/vi/display/vi_display.h
+    hle/service/vi/layer/vi_layer.cpp
+    hle/service/vi/layer/vi_layer.h
    hle/service/vi/vi.cpp
    hle/service/vi/vi.h
    hle/service/vi/vi_m.cpp
@@ -432,8 +437,6 @@ add_library(core STATIC
    loader/xci.h
    memory.cpp
    memory.h
-    memory_hook.cpp
-    memory_hook.h
    memory_setup.h
    perf_stats.cpp
    perf_stats.h
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -112,14 +113,14 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        Timing::AddTicks(amortized_ticks);
+        parent.core_timing.AddTicks(amortized_ticks);
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(Timing::GetDowncount(), 0);
+        return std::max(parent.core_timing.GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return Timing::GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
    }

    ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
@@ -172,8 +173,10 @@ void ARM_Dynarmic::Step() {
    cb->InterpreterFallback(jit->GetPC(), 1);
 }

-ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
+ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                           std::size_t core_index)
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
+      core_index{core_index}, core_timing{core_timing},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
    ThreadContext ctx{};
    inner_unicorn.SaveContext(ctx);
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,10 +12,14 @@
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"

-namespace Memory {
+namespace Common {
 struct PageTable;
 }

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Dynarmic_Callbacks;
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;

 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                 std::size_t core_index);
    ~ARM_Dynarmic();

    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -62,9 +67,10 @@ private:
    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
+    Timing::CoreTiming& core_timing;
    DynarmicExclusiveMonitor& exclusive_monitor;

-    Memory::PageTable* current_page_table = nullptr;
+    Common::PageTable* current_page_table = nullptr;
 };

 class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return {};
 }

-ARM_Unicorn::ARM_Unicorn() {
+ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(Timing::GetDowncount(), 0));
+        ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
    }
 }

@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    Timing::AddTicks(num_instructions);
+    core_timing.AddTicks(num_instructions);
    if (GDBStub::IsServerEnabled()) {
        if (last_bkpt_hit) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,12 +9,17 @@
 #include "core/arm/arm_interface.h"
 #include "core/gdbstub/gdbstub.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Unicorn final : public ARM_Interface {
 public:
-    ARM_Unicorn();
+    explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
    ~ARM_Unicorn();
+
    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                          Kernel::VMAPermission perms) override;
    void UnmapMemory(VAddr address, std::size_t size) override;
@@ -43,6 +48,7 @@ public:

 private:
    uc_engine* uc{};
+    Timing::CoreTiming& core_timing;
    GDBStub::BreakpointAddress last_bkpt{};
    bool last_bkpt_hit;
 };
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -36,7 +36,8 @@
 #include "frontend/applets/software_keyboard.h"
 #include "frontend/applets/web_browser.h"
 #include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
    return vfs->OpenFile(path, FileSys::Mode::Read);
 }
 struct System::Impl {
+    explicit Impl(System& system) : kernel{system} {}

    Cpu& CurrentCpuCore() {
        return cpu_core_manager.GetCurrentCore();
@@ -94,7 +96,7 @@ struct System::Impl {
    ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
        LOG_DEBUG(HW_Memory, "initialized OK");

-        Timing::Init();
+        core_timing.Initialize();
        kernel.Initialize();

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -114,13 +116,13 @@ struct System::Impl {
        if (web_browser == nullptr)
            web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();

-        auto main_process = Kernel::Process::Create(kernel, "main");
+        auto main_process = Kernel::Process::Create(system, "main");
        kernel.MakeCurrentProcess(main_process.get());

        telemetry_session = std::make_unique<Core::TelemetrySession>();
        service_manager = std::make_shared<Service::SM::ServiceManager>();

-        Service::Init(service_manager, *virtual_filesystem);
+        Service::Init(service_manager, system, *virtual_filesystem);
        GDBStub::Init();

        renderer = VideoCore::CreateRenderer(emu_window, system);
@@ -128,10 +130,16 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+        is_powered_on = true;
+
+        if (Settings::values.use_asynchronous_gpu_emulation) {
+            gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
+        } else {
+            gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
+        }

        cpu_core_manager.Initialize(system);
-        is_powered_on = true;
+
        LOG_DEBUG(Core, "Initialized OK");

        // Reset counters and set time origin to current frame
@@ -182,13 +190,13 @@ struct System::Impl {

    void Shutdown() {
        // Log last frame performance stats
-        auto perf_results = GetAndResetPerfStats();
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
-                             perf_results.emulation_speed * 100.0);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
-                             perf_results.game_fps);
-        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
-                             perf_results.frametime * 1000.0);
+        const auto perf_results = GetAndResetPerfStats();
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
+                                    perf_results.emulation_speed * 100.0);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
+                                    perf_results.game_fps);
+        telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
+                                    perf_results.frametime * 1000.0);

        is_powered_on = false;

@@ -205,7 +213,7 @@ struct System::Impl {

        // Shutdown kernel and core timing
        kernel.Shutdown();
-        Timing::Shutdown();
+        core_timing.Shutdown();

        // Close app loader
        app_loader.reset();
@@ -232,9 +240,10 @@ struct System::Impl {
    }

    PerfStatsResults GetAndResetPerfStats() {
-        return perf_stats.GetAndResetStats(Timing::GetGlobalTimeUs());
+        return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
    }

+    Timing::CoreTiming core_timing;
    Kernel::KernelCore kernel;
    /// RealVfsFilesystem instance
    FileSys::VirtualFilesystem virtual_filesystem;
@@ -264,7 +273,7 @@ struct System::Impl {
    Core::FrameLimiter frame_limiter;
 };

-System::System() : impl{std::make_unique<Impl>()} {}
+System::System() : impl{std::make_unique<Impl>(*this)} {}
 System::~System() = default;

 Cpu& System::CurrentCpuCore() {
@@ -396,6 +405,14 @@ const Kernel::KernelCore& System::Kernel() const {
    return impl->kernel;
 }

+Timing::CoreTiming& System::CoreTiming() {
+    return impl->core_timing;
+}
+
+const Timing::CoreTiming& System::CoreTiming() const {
+    return impl->core_timing;
+}
+
 Core::PerfStats& System::GetPerfStats() {
    return impl->perf_stats;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -47,6 +47,10 @@ namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -205,6 +209,12 @@ public:
    /// Provides a constant pointer to the current process.
    const Kernel::Process* CurrentProcess() const;

+    /// Provides a reference to the core timing instance.
+    Timing::CoreTiming& CoreTiming();
+
+    /// Provides a constant reference to the core timing instance.
+    const Timing::CoreTiming& CoreTiming() const;
+
    /// Provides a reference to the kernel instance.
    Kernel::KernelCore& Kernel();

@@ -283,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() {
    return System::GetInstance().CurrentArmInterface();
 }

-inline TelemetrySession& Telemetry() {
-    return System::GetInstance().TelemetrySession();
-}
-
 inline Kernel::Process* CurrentProcess() {
    return System::GetInstance().CurrentProcess();
 }
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
 #endif
 #include "core/arm/exclusive_monitor.h"
 #include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/scheduler.h"
@@ -49,20 +50,21 @@ bool CpuBarrier::Rendezvous() {
    return false;
 }

-Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_index{core_index} {
+Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+         std::size_t core_index)
+    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
 #else
        arm_interface = std::make_unique<ARM_Unicorn>();
        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
    } else {
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
    }

-    scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
 }

 Cpu::~Cpu() = default;
@@ -93,14 +95,14 @@ void Cpu::RunLoop(bool tight_loop) {

        if (IsMainCore()) {
            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
-            Timing::Idle();
-            Timing::Advance();
+            core_timing.Idle();
+            core_timing.Advance();
        }

        PrepareReschedule();
    } else {
        if (IsMainCore()) {
-            Timing::Advance();
+            core_timing.Advance();
        }

        if (tight_loop) {
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,14 @@ namespace Kernel {
 class Scheduler;
 }

+namespace Core {
+class System;
+}
+
+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -41,7 +49,8 @@ private:

 class Cpu {
 public:
-    Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index);
+    Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
+        std::size_t core_index);
    ~Cpu();

    void RunLoop(bool tight_loop = true);
@@ -82,6 +91,7 @@ private:
    std::unique_ptr<ARM_Interface> arm_interface;
    CpuBarrier& cpu_barrier;
    std::unique_ptr<Kernel::Scheduler> scheduler;
+    Timing::CoreTiming& core_timing;

    std::atomic<bool> reschedule_pending = false;
    std::size_t core_index;
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,71 +8,60 @@
 #include <mutex>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <vector>
+
 #include "common/assert.h"
 #include "common/thread.h"
-#include "common/threadsafe_queue.h"
 #include "core/core_timing_util.h"

 namespace Core::Timing {

-static s64 global_timer;
-static int slice_length;
-static int downcount;
+constexpr int MAX_SLICE_LENGTH = 20000;

-struct EventType {
-    TimedCallback callback;
-    const std::string* name;
-};
-
-struct Event {
+struct CoreTiming::Event {
    s64 time;
    u64 fifo_order;
    u64 userdata;
    const EventType* type;
+
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
+
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
 };

-// Sort by time, unless the times are the same, in which case sort by the order added to the queue
-static bool operator>(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
+
+void CoreTiming::Initialize() {
+    downcount = MAX_SLICE_LENGTH;
+    slice_length = MAX_SLICE_LENGTH;
+    global_timer = 0;
+    idled_cycles = 0;
+
+    // The time between CoreTiming being initialized and the first call to Advance() is considered
+    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
+    // executing the first cycle of each slice to prepare the slice length and downcount for
+    // that slice.
+    is_global_timer_sane = true;
+
+    event_fifo_id = 0;
+
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
 }

-static bool operator<(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+void CoreTiming::Shutdown() {
+    MoveEvents();
+    ClearPendingEvents();
+    UnregisterAllEvents();
 }

-// unordered_map stores each element separately as a linked list node so pointers to elements
-// remain stable regardless of rehashes/resizing.
-static std::unordered_map<std::string, EventType> event_types;
-
-// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
-// We don't use std::priority_queue because we need to be able to serialize, unserialize and
-// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
-// by the standard adaptor class.
-static std::vector<Event> event_queue;
-static u64 event_fifo_id;
-// the queue for storing the events from other threads threadsafe until they will be added
-// to the event_queue by the emu thread
-static Common::MPSCQueue<Event, false> ts_queue;
-
-// the queue for unscheduling the events from other threads threadsafe
-static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
-
-constexpr int MAX_SLICE_LENGTH = 20000;
-
-static s64 idled_cycles;
-
-// Are we in a function that has been called from Advance()
-// If events are sheduled from a function that gets called from Advance(),
-// don't change slice_length and downcount.
-static bool is_global_timer_sane;
-
-static EventType* ev_lost = nullptr;
-
-static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
-
-EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
+EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
    // check for existing type with same name.
    // we want event type names to remain unique so that we can use them for serialization.
    ASSERT_MSG(event_types.find(name) == event_types.end(),
@@ -86,71 +75,31 @@ EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
    return event_type;
 }

-void UnregisterAllEvents() {
+void CoreTiming::UnregisterAllEvents() {
    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
    event_types.clear();
 }

-void Init() {
-    downcount = MAX_SLICE_LENGTH;
-    slice_length = MAX_SLICE_LENGTH;
-    global_timer = 0;
-    idled_cycles = 0;
-
-    // The time between CoreTiming being intialized and the first call to Advance() is considered
-    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
-    // executing the first cycle of each slice to prepare the slice length and downcount for
-    // that slice.
-    is_global_timer_sane = true;
-
-    event_fifo_id = 0;
-    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
-}
-
-void Shutdown() {
-    MoveEvents();
-    ClearPendingEvents();
-    UnregisterAllEvents();
-}
-
-// This should only be called from the CPU thread. If you are calling
-// it from any other thread, you are doing something evil
-u64 GetTicks() {
-    u64 ticks = static_cast<u64>(global_timer);
-    if (!is_global_timer_sane) {
-        ticks += slice_length - downcount;
-    }
-    return ticks;
-}
-
-void AddTicks(u64 ticks) {
-    downcount -= static_cast<int>(ticks);
-}
-
-u64 GetIdleTicks() {
-    return static_cast<u64>(idled_cycles);
-}
-
-void ClearPendingEvents() {
-    event_queue.clear();
-}
-
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
    ASSERT(event_type != nullptr);
-    s64 timeout = GetTicks() + cycles_into_future;
+    const s64 timeout = GetTicks() + cycles_into_future;
+
    // If this event needs to be scheduled before the next advance(), force one early
-    if (!is_global_timer_sane)
+    if (!is_global_timer_sane) {
        ForceExceptionCheck(cycles_into_future);
+    }
+
    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
 }

-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                         u64 userdata) {
    ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
 }

-void UnscheduleEvent(const EventType* event_type, u64 userdata) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
        return e.type == event_type && e.userdata == userdata;
    });

@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
    }
 }

-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
+void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
    unschedule_queue.Push(std::make_pair(event_type, userdata));
 }

-void RemoveEvent(const EventType* event_type) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
-                              [&](const Event& e) { return e.type == event_type; });
+u64 CoreTiming::GetTicks() const {
+    u64 ticks = static_cast<u64>(global_timer);
+    if (!is_global_timer_sane) {
+        ticks += slice_length - downcount;
+    }
+    return ticks;
+}
+
+u64 CoreTiming::GetIdleTicks() const {
+    return static_cast<u64>(idled_cycles);
+}
+
+void CoreTiming::AddTicks(u64 ticks) {
+    downcount -= static_cast<int>(ticks);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const EventType* event_type) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
+                                    [&](const Event& e) { return e.type == event_type; });

    // Removing random items breaks the invariant so we have to re-establish it.
    if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
    }
 }

-void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
+void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
    MoveEvents();
    RemoveEvent(event_type);
 }

-void ForceExceptionCheck(s64 cycles) {
+void CoreTiming::ForceExceptionCheck(s64 cycles) {
    cycles = std::max<s64>(0, cycles);
-    if (downcount > cycles) {
-        // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
-        // here. Account for cycles already executed by adjusting the g.slice_length
-        slice_length -= downcount - static_cast<int>(cycles);
-        downcount = static_cast<int>(cycles);
+    if (downcount <= cycles) {
+        return;
    }
+
+    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
+    // here. Account for cycles already executed by adjusting the g.slice_length
+    slice_length -= downcount - static_cast<int>(cycles);
+    downcount = static_cast<int>(cycles);
 }

-void MoveEvents() {
+void CoreTiming::MoveEvents() {
    for (Event ev; ts_queue.Pop(ev);) {
        ev.fifo_order = event_fifo_id++;
        event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
    }
 }

-void Advance() {
+void CoreTiming::Advance() {
    MoveEvents();
    for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
        UnscheduleEvent(ev.first, ev.second);
    }

-    int cycles_executed = slice_length - downcount;
+    const int cycles_executed = slice_length - downcount;
    global_timer += cycles_executed;
    slice_length = MAX_SLICE_LENGTH;

@@ -229,16 +200,16 @@ void Advance() {
    downcount = slice_length;
 }

-void Idle() {
+void CoreTiming::Idle() {
    idled_cycles += downcount;
    downcount = 0;
 }

-std::chrono::microseconds GetGlobalTimeUs() {
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
 }

-int GetDowncount() {
+int CoreTiming::GetDowncount() const {
    return downcount;
 }

--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@

 #pragma once

+#include <chrono>
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::Timing {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string* name;
+};
+
 /**
 * This is a system to schedule events into the emulated machine's future. Time is measured
 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
 * inside callback:
 *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
 */
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();

-#include <chrono>
-#include <functional>
-#include <string>
-#include "common/common_types.h"
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;

-namespace Core::Timing {
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;

-struct EventType;
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();

-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+    /// Tears down all timing related functionality.
+    void Shutdown();

-/**
- * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
- * required to end slice -1 and start slice 0 before the first cycle of code is executed.
- */
-void Init();
-void Shutdown();
+    /// Registers a core timing event with the given name and callback.
+    ///
+    /// @param name     The name of the core timing event to register.
+    /// @param callback The callback to execute for the event.
+    ///
+    /// @returns An EventType instance representing the registered event.
+    ///
+    /// @pre The name of the event being registered must be unique among all
+    ///      registered events.
+    ///
+    EventType* RegisterEvent(const std::string& name, TimedCallback callback);

-/**
- * This should only be called from the emu thread, if you are calling it any other thread, you are
- * doing something evil
- */
-u64 GetTicks();
-u64 GetIdleTicks();
-void AddTicks(u64 ticks);
+    /// Unregisters all registered events thus far.
+    void UnregisterAllEvents();

-/**
- * Returns the event_type identifier. if name is not unique, it will assert.
- */
-EventType* RegisterEvent(const std::string& name, TimedCallback callback);
-void UnregisterAllEvents();
+    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
+    /// event is scheduled earlier than the current values.
+    ///
+    /// Scheduling from a callback will not update the downcount until the Advance() completes.
+    void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);

-/**
- * After the first Advance, the slice lengths and the downcount will be reduced whenever an event
- * is scheduled earlier than the current values.
- * Scheduling from a callback will not update the downcount until the Advance() completes.
- */
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
+    /// This is to be called when outside of hle threads, such as the graphics thread, wants to
+    /// schedule things to be executed on the main thread.
+    ///
+    /// @note This doesn't change slice_length and thus events scheduled by this might be
+    /// called with a delay of up to MAX_SLICE_LENGTH
+    void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                 u64 userdata = 0);

-/**
- * This is to be called when outside of hle threads, such as the graphics thread, wants to
- * schedule things to be executed on the main thread.
- * Not that this doesn't change slice_length and thus events scheduled by this might be called
- * with a delay of up to MAX_SLICE_LENGTH
- */
-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
+    void UnscheduleEvent(const EventType* event_type, u64 userdata);
+    void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);

-void UnscheduleEvent(const EventType* event_type, u64 userdata);
-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const EventType* event_type);
+    void RemoveNormalAndThreadsafeEvent(const EventType* event_type);

-/// We only permit one event of each type in the queue at a time.
-void RemoveEvent(const EventType* event_type);
-void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
+    void ForceExceptionCheck(s64 cycles);

-/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
- * the previous timing slice and begins the next one, you must Advance from the previous
- * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
- * Advance() is required to initialize the slice length before the first cycle of emulated
- * instructions is executed.
- */
-void Advance();
-void MoveEvents();
+    /// This should only be called from the emu thread, if you are calling it any other thread,
+    /// you are doing something evil
+    u64 GetTicks() const;

-/// Pretend that the main CPU has executed enough cycles to reach the next event.
-void Idle();
+    u64 GetIdleTicks() const;

-/// Clear all pending events. This should ONLY be done on exit.
-void ClearPendingEvents();
+    void AddTicks(u64 ticks);

-void ForceExceptionCheck(s64 cycles);
+    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
+    /// the previous timing slice and begins the next one, you must Advance from the previous
+    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
+    /// Advance() is required to initialize the slice length before the first cycle of emulated
+    /// instructions is executed.
+    void Advance();

-std::chrono::microseconds GetGlobalTimeUs();
+    /// Pretend that the main CPU has executed enough cycles to reach the next event.
+    void Idle();

-int GetDowncount();
+    std::chrono::microseconds GetGlobalTimeUs() const;
+
+    int GetDowncount() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+    void MoveEvents();
+
+    s64 global_timer = 0;
+    s64 idled_cycles = 0;
+    int slice_length = 0;
+    int downcount = 0;
+
+    // Are we in a function that has been called from Advance()
+    // If events are scheduled from a function that gets called from Advance(),
+    // don't change slice_length and downcount.
+    bool is_global_timer_sane = false;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    // Stores each element separately as a linked list node so pointers to elements
+    // remain stable regardless of rehashes/resizing.
+    std::unordered_map<std::string, EventType> event_types;
+
+    // The queue for storing the events from other threads threadsafe until they will be added
+    // to the event_queue by the emu thread
+    Common::MPSCQueue<Event> ts_queue;
+
+    // The queue for unscheduling the events from other threads threadsafe
+    Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
+
+    EventType* ev_lost = nullptr;
+};

 } // namespace Core::Timing
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"

 namespace Core::Timing {

@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }

+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
 } // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.

 inline s64 msToCycles(int ms) {
    // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
    return cycles * 1000 / BASE_CLOCK_RATE;
 }

+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
    exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());

    for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
+        cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
    }

    // Create threads for CPU cores 1-3, and build thread_to_cpu map
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
 }

 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
-    std::ifstream file(filename);
+    std::ifstream file;
+    OpenFStream(file, filename, std::ios_base::in);
    if (!file.is_open())
        return;

--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
    if (offset + length > data.size())
        data.resize(offset + length);
    const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data(), data_, write);
+    std::memcpy(data.data() + offset, data_, write);
    return write;
 }

--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
            framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
 }

-std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
+std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
    new_x = std::max(new_x, framebuffer_layout.screen.left);
    new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
    /**
     * Clip the provided coordinates to be inside the touchscreen area.
     */
-    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
+    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
 };

 } // namespace Core::Frontend
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {

 // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
 template <class T>
-static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
-                                           float screen_aspect_ratio) {
+static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
+                                         float screen_aspect_ratio) {
    float scale = std::min(static_cast<float>(window_area.GetWidth()),
                           window_area.GetHeight() / screen_aspect_ratio);
-    return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
-                                  static_cast<T>(std::round(scale * screen_aspect_ratio))};
+    return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
+                                static_cast<T>(std::round(scale * screen_aspect_ratio))};
 }

 FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {

    const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
                                       ScreenUndocked::Width};
-    MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
-    MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
+    Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
+    Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);

    float window_aspect_ratio = static_cast<float>(height) / width;

--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
    unsigned width{ScreenUndocked::Width};
    unsigned height{ScreenUndocked::Height};

-    MathUtil::Rectangle<unsigned> screen;
+    Common::Rectangle<unsigned> screen;

    /**
     * Returns the ration of pixel size of the screen, compared to the native size of the undocked
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
 *   Orientation is determined by right-hand rule.
 *   Units: deg/sec
 */
-using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
+using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;

 /**
 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -4,10 +4,10 @@

 #pragma once

+#include "common/bit_field.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
-#include "core/hle/kernel/errors.h"
-#include "core/memory.h"

 namespace IPC {

--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -19,9 +19,12 @@
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/result.h"

 namespace IPC {

+constexpr ResultCode ERR_REMOTE_PROCESS_DEAD{ErrorModule::HIPC, 301};
+
 class RequestHelperBase {
 protected:
    Kernel::HLERequestContext* context = nullptr;
@@ -271,6 +274,20 @@ inline void ResponseBuilder::Push(u64 value) {
    Push(static_cast<u32>(value >> 32));
 }

+template <>
+inline void ResponseBuilder::Push(float value) {
+    u32 integral;
+    std::memcpy(&integral, &value, sizeof(u32));
+    Push(integral);
+}
+
+template <>
+inline void ResponseBuilder::Push(double value) {
+    u64 integral;
+    std::memcpy(&integral, &value, sizeof(u64));
+    Push(integral);
+}
+
 template <>
 inline void ResponseBuilder::Push(bool value) {
    Push(static_cast<u8>(value));
@@ -350,7 +367,7 @@ public:
    template <class T>
    std::shared_ptr<T> PopIpcInterface() {
        ASSERT(context->Session()->IsDomain());
-        ASSERT(context->GetDomainMessageHeader()->input_object_count > 0);
+        ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
        return context->GetDomainRequestHandler<T>(Pop<u32>() - 1);
    }
 };
@@ -362,6 +379,11 @@ inline u32 RequestParser::Pop() {
    return cmdbuf[index++];
 }

+template <>
+inline s32 RequestParser::Pop() {
+    return static_cast<s32>(Pop<u32>());
+}
+
 template <typename T>
 void RequestParser::PopRaw(T& value) {
    std::memcpy(&value, cmdbuf + index, sizeof(T));
@@ -392,11 +414,37 @@ inline u64 RequestParser::Pop() {
    return msw << 32 | lsw;
 }

+template <>
+inline s8 RequestParser::Pop() {
+    return static_cast<s8>(Pop<u8>());
+}
+
+template <>
+inline s16 RequestParser::Pop() {
+    return static_cast<s16>(Pop<u16>());
+}
+
 template <>
 inline s64 RequestParser::Pop() {
    return static_cast<s64>(Pop<u64>());
 }

+template <>
+inline float RequestParser::Pop() {
+    const u32 value = Pop<u32>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
+template <>
+inline double RequestParser::Pop() {
+    const u64 value = Pop<u64>();
+    float real;
+    std::memcpy(&real, &value, sizeof(real));
+    return real;
+}
+
 template <>
 inline bool RequestParser::Pop() {
    return Pop<u8>() != 0;
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
@@ -18,32 +19,171 @@
 #include "core/memory.h"

 namespace Kernel {
-namespace AddressArbiter {
+namespace {
+// Wake up num_to_wake (or all) threads in a vector.
+void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
+    // them all.
+    std::size_t last = waiting_threads.size();
+    if (num_to_wake > 0) {
+        last = num_to_wake;
+    }

-// Performs actual address waiting logic.
-static ResultCode WaitForAddress(VAddr address, s64 timeout) {
-    SharedPtr<Thread> current_thread = GetCurrentThread();
+    // Signal the waiting threads.
+    for (std::size_t i = 0; i < last; i++) {
+        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
+        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+        waiting_threads[i]->SetArbiterWaitAddress(0);
+        waiting_threads[i]->ResumeFromWait();
+    }
+}
+} // Anonymous namespace
+
+AddressArbiter::AddressArbiter(Core::System& system) : system{system} {}
+AddressArbiter::~AddressArbiter() = default;
+
+ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value,
+                                           s32 num_to_wake) {
+    switch (type) {
+    case SignalType::Signal:
+        return SignalToAddressOnly(address, num_to_wake);
+    case SignalType::IncrementAndSignalIfEqual:
+        return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
+    case SignalType::ModifyByWaitingCountAndSignalIfEqual:
+        return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
+
+ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                              s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(value + 1));
+    return SignalToAddressOnly(address, num_to_wake);
+}
+
+ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                                         s32 num_to_wake) {
+    // Ensure that we can write to the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    // Get threads waiting on the address.
+    const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
+
+    // Determine the modified value depending on the waiting count.
+    s32 updated_value;
+    if (waiting_threads.empty()) {
+        updated_value = value - 1;
+    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+        updated_value = value + 1;
+    } else {
+        updated_value = value;
+    }
+
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+
+    Memory::Write32(address, static_cast<u32>(updated_value));
+    WakeThreads(waiting_threads, num_to_wake);
+    return RESULT_SUCCESS;
+}
+
+ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value,
+                                          s64 timeout_ns) {
+    switch (type) {
+    case ArbitrationType::WaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, false);
+    case ArbitrationType::DecrementAndWaitIfLessThan:
+        return WaitForAddressIfLessThan(address, value, timeout_ns, true);
+    case ArbitrationType::WaitIfEqual:
+        return WaitForAddressIfEqual(address, value, timeout_ns);
+    default:
+        return ERR_INVALID_ENUM_VALUE;
+    }
+}
+
+ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                                    bool should_decrement) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    const s32 cur_value = static_cast<s32>(Memory::Read32(address));
+    if (cur_value >= value) {
+        return ERR_INVALID_STATE;
+    }
+
+    if (should_decrement) {
+        Memory::Write32(address, static_cast<u32>(cur_value - 1));
+    }
+
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddressImpl(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+    // Ensure that we can read the address.
+    if (!Memory::IsValidVirtualAddress(address)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+    // Only wait for the address if equal.
+    if (static_cast<s32>(Memory::Read32(address)) != value) {
+        return ERR_INVALID_STATE;
+    }
+    // Short-circuit without rescheduling, if timeout is zero.
+    if (timeout == 0) {
+        return RESULT_TIMEOUT;
+    }
+
+    return WaitForAddressImpl(address, timeout);
+}
+
+ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
+    SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread();
    current_thread->SetArbiterWaitAddress(address);
    current_thread->SetStatus(ThreadStatus::WaitArb);
    current_thread->InvalidateWakeupCallback();

    current_thread->WakeAfterDelay(timeout);

-    Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
    return RESULT_TIMEOUT;
 }

-// Gets the threads waiting on an address.
-static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) {
-    const auto RetrieveWaitingThreads = [](std::size_t core_index,
-                                           std::vector<SharedPtr<Thread>>& waiting_threads,
-                                           VAddr arb_addr) {
-        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
+    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
+                                               std::vector<SharedPtr<Thread>>& waiting_threads,
+                                               VAddr arb_addr) {
+        const auto& scheduler = system.Scheduler(core_index);
        const auto& thread_list = scheduler.GetThreadList();

        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr)
+            if (thread->GetArbiterWaitAddress() == arb_addr) {
                waiting_threads.push_back(thread);
+            }
        }
    };

@@ -62,119 +202,4 @@ static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address)

    return threads;
 }
-
-// Wake up num_to_wake (or all) threads in a vector.
-static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
-    // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
-    // them all.
-    std::size_t last = waiting_threads.size();
-    if (num_to_wake > 0)
-        last = num_to_wake;
-
-    // Signal the waiting threads.
-    for (std::size_t i = 0; i < last; i++) {
-        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
-        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
-        waiting_threads[i]->SetArbiterWaitAddress(0);
-        waiting_threads[i]->ResumeFromWait();
-    }
-}
-
-// Signals an address being waited on.
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Signals an address being waited on and increments its value if equal to the value argument.
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(value + 1));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    return SignalToAddress(address, num_to_wake);
-}
-
-// Signals an address being waited on and modifies its value based on waiting thread count if equal
-// to the value argument.
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
-                                                         s32 num_to_wake) {
-    // Ensure that we can write to the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    // Get threads waiting on the address.
-    std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address);
-
-    // Determine the modified value depending on the waiting count.
-    s32 updated_value;
-    if (waiting_threads.empty()) {
-        updated_value = value - 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-        updated_value = value + 1;
-    } else {
-        updated_value = value;
-    }
-
-    if (static_cast<s32>(Memory::Read32(address)) == value) {
-        Memory::Write32(address, static_cast<u32>(updated_value));
-    } else {
-        return ERR_INVALID_STATE;
-    }
-
-    WakeThreads(waiting_threads, num_to_wake);
-    return RESULT_SUCCESS;
-}
-
-// Waits on an address if the value passed is less than the argument value, optionally decrementing.
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-
-    s32 cur_value = static_cast<s32>(Memory::Read32(address));
-    if (cur_value < value) {
-        if (should_decrement) {
-            Memory::Write32(address, static_cast<u32>(cur_value - 1));
-        }
-    } else {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-
-// Waits on an address if the value passed is equal to the argument value.
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
-    // Ensure that we can read the address.
-    if (!Memory::IsValidVirtualAddress(address)) {
-        return ERR_INVALID_ADDRESS_STATE;
-    }
-    // Only wait for the address if equal.
-    if (static_cast<s32>(Memory::Read32(address)) != value) {
-        return ERR_INVALID_STATE;
-    }
-    // Short-circuit without rescheduling, if timeout is zero.
-    if (timeout == 0) {
-        return RESULT_TIMEOUT;
-    }
-
-    return WaitForAddress(address, timeout);
-}
-} // namespace AddressArbiter
 } // namespace Kernel
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -4,31 +4,77 @@

 #pragma once

+#include <vector>
+
 #include "common/common_types.h"
+#include "core/hle/kernel/object.h"

 union ResultCode;

+namespace Core {
+class System;
+}
+
 namespace Kernel {

-namespace AddressArbiter {
-enum class ArbitrationType {
-    WaitIfLessThan = 0,
-    DecrementAndWaitIfLessThan = 1,
-    WaitIfEqual = 2,
+class Thread;
+
+class AddressArbiter {
+public:
+    enum class ArbitrationType {
+        WaitIfLessThan = 0,
+        DecrementAndWaitIfLessThan = 1,
+        WaitIfEqual = 2,
+    };
+
+    enum class SignalType {
+        Signal = 0,
+        IncrementAndSignalIfEqual = 1,
+        ModifyByWaitingCountAndSignalIfEqual = 2,
+    };
+
+    explicit AddressArbiter(Core::System& system);
+    ~AddressArbiter();
+
+    AddressArbiter(const AddressArbiter&) = delete;
+    AddressArbiter& operator=(const AddressArbiter&) = delete;
+
+    AddressArbiter(AddressArbiter&&) = default;
+    AddressArbiter& operator=(AddressArbiter&&) = delete;
+
+    /// Signals an address being waited on with a particular signaling type.
+    ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake);
+
+    /// Waits on an address with a particular arbitration type.
+    ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns);
+
+private:
+    /// Signals an address being waited on.
+    ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake);
+
+    /// Signals an address being waited on and increments its value if equal to the value argument.
+    ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+
+    /// Signals an address being waited on and modifies its value based on waiting thread count if
+    /// equal to the value argument.
+    ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+                                                             s32 num_to_wake);
+
+    /// Waits on an address if the value passed is less than the argument value,
+    /// optionally decrementing.
+    ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
+                                        bool should_decrement);
+
+    /// Waits on an address if the value passed is equal to the argument value.
+    ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
+
+    // Waits on the given address with a timeout in nanoseconds
+    ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
+
+    // Gets the threads waiting on an address.
+    std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const;
+
+    Core::System& system;
 };

-enum class SignalType {
-    Signal = 0,
-    IncrementAndSignalIfEqual = 1,
-    ModifyByWaitingCountAndSignalIfEqual = 2,
-};
-
-ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
-ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
-
-ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
-ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-} // namespace AddressArbiter
-
 } // namespace Kernel
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -33,10 +33,11 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
    // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);

-    if (server_port->hle_handler)
-        server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
-    else
-        server_port->pending_sessions.push_back(std::get<SharedPtr<ServerSession>>(sessions));
+    if (server_port->HasHLEHandler()) {
+        server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
+    } else {
+        server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions));
+    }

    // Wake the threads waiting on the ServerPort
    server_port->WakeupAllWaitingThreads();
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -17,21 +17,11 @@ ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
    // the emulated application.

-    // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they
+    // A local reference to the ServerSession is necessary to guarantee it
    // will be kept alive until after ClientDisconnected() returns.
    SharedPtr<ServerSession> server = parent->server;
    if (server) {
-        std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler;
-        if (hle_handler)
-            hle_handler->ClientDisconnected(server);
-
-        // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
-        // their WaitSynchronization result to 0xC920181A.
-
-        // Clean up the list of client threads with pending requests, they are unneeded now that the
-        // client endpoint is closed.
-        server->pending_requesting_threads.clear();
-        server->currently_handling = nullptr;
+        server->ClientDisconnected();
    }

    parent->client = nullptr;
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -36,14 +36,15 @@ public:

    ResultCode SendSyncRequest(SharedPtr<Thread> thread);

-    std::string name; ///< Name of client port (optional)
+private:
+    explicit ClientSession(KernelCore& kernel);
+    ~ClientSession() override;

    /// The parent session, which links to the server endpoint.
    std::shared_ptr<Session> parent;

-private:
-    explicit ClientSession(KernelCore& kernel);
-    ~ClientSession() override;
+    /// Name of the client session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
+constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
 namespace Kernel {
 namespace {
 constexpr u16 GetSlot(Handle handle) {
-    return handle >> 15;
+    return static_cast<u16>(handle >> 15);
 }

 constexpr u16 GetGeneration(Handle handle) {
-    return handle & 0x7FFF;
+    return static_cast<u16>(handle & 0x7FFF);
 }
 } // Anonymous namespace

 HandleTable::HandleTable() {
-    next_generation = 1;
    Clear();
 }

 HandleTable::~HandleTable() = default;

+ResultCode HandleTable::SetSize(s32 handle_table_size) {
+    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // Values less than or equal to zero indicate to use the maximum allowable
+    // size for the handle table in the actual kernel, so we ignore the given
+    // value in that case, since we assume this by default unless this function
+    // is called.
+    if (handle_table_size > 0) {
+        table_size = static_cast<u16>(handle_table_size);
+    }
+
+    return RESULT_SUCCESS;
+}
+
 ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
    DEBUG_ASSERT(obj != nullptr);

-    u16 slot = next_free_slot;
-    if (slot >= generations.size()) {
+    const u16 slot = next_free_slot;
+    if (slot >= table_size) {
        LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
        return ERR_HANDLE_TABLE_FULL;
    }
    next_free_slot = generations[slot];

-    u16 generation = next_generation++;
+    const u16 generation = next_generation++;

    // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
    // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 }

 ResultCode HandleTable::Close(Handle handle) {
-    if (!IsValid(handle))
+    if (!IsValid(handle)) {
        return ERR_INVALID_HANDLE;
+    }

-    u16 slot = GetSlot(handle);
+    const u16 slot = GetSlot(handle);

    objects[slot] = nullptr;

@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
 }

 bool HandleTable::IsValid(Handle handle) const {
-    std::size_t slot = GetSlot(handle);
-    u16 generation = GetGeneration(handle);
+    const std::size_t slot = GetSlot(handle);
+    const u16 generation = GetGeneration(handle);

-    return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
+    return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
 }

 SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
 }

 void HandleTable::Clear() {
-    for (u16 i = 0; i < MAX_COUNT; ++i) {
+    for (u16 i = 0; i < table_size; ++i) {
        generations[i] = i + 1;
        objects[i] = nullptr;
    }
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -49,6 +49,20 @@ public:
    HandleTable();
    ~HandleTable();

+    /**
+     * Sets the number of handles that may be in use at one time
+     * for this handle table.
+     *
+     * @param handle_table_size The desired size to limit the handle table to.
+     *
+     * @returns an error code indicating if initialization was successful.
+     *          If initialization was not successful, then ERR_OUT_OF_MEMORY
+     *          will be returned.
+     *
+     * @pre handle_table_size must be within the range [0, 1024]
+     */
+    ResultCode SetSize(s32 handle_table_size);
+
    /**
     * Allocates a handle for the given object.
     * @return The created Handle or one of the following errors:
@@ -103,14 +117,21 @@ private:
     */
    std::array<u16, MAX_COUNT> generations;

+    /**
+     * The limited size of the handle table. This can be specified by process
+     * capabilities in order to restrict the overall number of handles that
+     * can be created in a process instance
+     */
+    u16 table_size = static_cast<u16>(MAX_COUNT);
+
    /**
     * Global counter of the number of created handles. Stored in `generations` when a handle is
     * created, and wraps around to 1 when it hits 0x8000.
     */
-    u16 next_generation;
+    u16 next_generation = 1;

    /// Head of the free slots linked list.
-    u16 next_free_slot;
+    u16 next_free_slot = 0;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default;
 void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf,
                                           bool incoming) {
    IPC::RequestParser rp(src_cmdbuf);
-    command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>());
+    command_header = rp.PopRaw<IPC::CommandHeader>();

    if (command_header->type == IPC::CommandType::Close) {
        // Close does not populate the rest of the IPC header
@@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_

    // If handle descriptor is present, add size of it
    if (command_header->enable_handle_descriptor) {
-        handle_descriptor_header =
-            std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>());
+        handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>();
        if (handle_descriptor_header->send_current_pid) {
            rp.Skip(2, false);
        }
@@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_
        // If this is an incoming message, only CommandType "Request" has a domain header
        // All outgoing domain messages have the domain header, if only incoming has it
        if (incoming || domain_message_header) {
-            domain_message_header =
-                std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>());
+            domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>();
        } else {
-            if (Session()->IsDomain())
+            if (Session()->IsDomain()) {
                LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!");
+            }
        }
    }

-    data_payload_header =
-        std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>());
+    data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>();

    data_payload_offset = rp.GetCurrentOffset();

@@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
        // Write the domain objects to the command buffer, these go after the raw untranslated data.
        // TODO(Subv): This completely ignores C buffers.
        std::size_t domain_offset = size - domain_message_header->num_objects;
-        auto& request_handlers = server_session->domain_request_handlers;

-        for (auto& object : domain_objects) {
-            request_handlers.emplace_back(object);
-            dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size());
+        for (const auto& object : domain_objects) {
+            server_session->AppendDomainRequestHandler(object);
+            dst_cmdbuf[domain_offset++] =
+                static_cast<u32_le>(server_session->NumDomainRequestHandlers());
        }
    }

--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -15,6 +16,8 @@
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/object.h"

+union ResultCode;
+
 namespace Service {
 class ServiceFrameworkBase;
 }
@@ -166,12 +169,12 @@ public:
        return buffer_c_desciptors;
    }

-    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
-        return domain_message_header.get();
+    const IPC::DomainMessageHeader& GetDomainMessageHeader() const {
+        return domain_message_header.value();
    }

    bool HasDomainMessageHeader() const {
-        return domain_message_header != nullptr;
+        return domain_message_header.has_value();
    }

    /// Helper function to read a buffer using the appropriate buffer descriptor
@@ -208,14 +211,12 @@ public:

    template <typename T>
    SharedPtr<T> GetCopyObject(std::size_t index) {
-        ASSERT(index < copy_objects.size());
-        return DynamicObjectCast<T>(copy_objects[index]);
+        return DynamicObjectCast<T>(copy_objects.at(index));
    }

    template <typename T>
    SharedPtr<T> GetMoveObject(std::size_t index) {
-        ASSERT(index < move_objects.size());
-        return DynamicObjectCast<T>(move_objects[index]);
+        return DynamicObjectCast<T>(move_objects.at(index));
    }

    void AddMoveObject(SharedPtr<Object> object) {
@@ -232,7 +233,7 @@ public:

    template <typename T>
    std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const {
-        return std::static_pointer_cast<T>(domain_request_handlers[index]);
+        return std::static_pointer_cast<T>(domain_request_handlers.at(index));
    }

    void SetDomainRequestHandlers(
@@ -272,10 +273,10 @@ private:
    boost::container::small_vector<SharedPtr<Object>, 8> copy_objects;
    boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects;

-    std::shared_ptr<IPC::CommandHeader> command_header;
-    std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header;
-    std::shared_ptr<IPC::DataPayloadHeader> data_payload_header;
-    std::shared_ptr<IPC::DomainMessageHeader> domain_message_header;
+    std::optional<IPC::CommandHeader> command_header;
+    std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header;
+    std::optional<IPC::DataPayloadHeader> data_payload_header;
+    std::optional<IPC::DomainMessageHeader> domain_message_header;
    std::vector<IPC::BufferDescriptorX> buffer_x_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors;
    std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors;
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@

 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
@@ -86,6 +87,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 }

 struct KernelCore::Impl {
+    explicit Impl(Core::System& system) : system{system} {}
+
    void Initialize(KernelCore& kernel) {
        Shutdown();

@@ -124,7 +127,7 @@ struct KernelCore::Impl {

    void InitializeThreads() {
        thread_wakeup_event_type =
-            Core::Timing::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    std::atomic<u32> next_object_id{0};
@@ -145,9 +148,12 @@ struct KernelCore::Impl {
    /// Map of named ports managed by the kernel, which can be retrieved using
    /// the ConnectToPort SVC.
    NamedPortTable named_ports;
+
+    // System context
+    Core::System& system;
 };

-KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
+KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
 KernelCore::~KernelCore() {
    Shutdown();
 }
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,12 +11,18 @@
 template <typename T>
 class ResultVal;

-namespace Core::Timing {
-struct EventType;
+namespace Core {
+class System;
 }

+namespace Core::Timing {
+class CoreTiming;
+struct EventType;
+} // namespace Core::Timing
+
 namespace Kernel {

+class AddressArbiter;
 class ClientPort;
 class HandleTable;
 class Process;
@@ -29,7 +35,14 @@ private:
    using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;

 public:
-    KernelCore();
+    /// Constructs an instance of the kernel using the given System
+    /// instance as a context for any necessary system-related state,
+    /// such as threads, CPU core state, etc.
+    ///
+    /// @post After execution of the constructor, the provided System
+    ///       object *must* outlive the kernel instance itself.
+    ///
+    explicit KernelCore(Core::System& system);
    ~KernelCore();

    KernelCore(const KernelCore&) = delete;
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -31,7 +31,7 @@ namespace {
 */
 void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
    // Setup page table so we can write to memory
-    SetCurrentPageTable(&owner_process.VMManager().page_table);
+    Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);

    // Initialize new "main" thread
    const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
@@ -53,9 +53,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi
 CodeSet::CodeSet() = default;
 CodeSet::~CodeSet() = default;

-SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
-    SharedPtr<Process> process(new Process(kernel));
+SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) {
+    auto& kernel = system.Kernel();

+    SharedPtr<Process> process(new Process(system));
    process->name = std::move(name);
    process->resource_limit = kernel.GetSystemResourceLimit();
    process->status = ProcessStatus::Created;
@@ -99,7 +100,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
    vm_manager.Reset(metadata.GetAddressSpaceType());

    const auto& caps = metadata.GetKernelCapabilities();
-    return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    const auto capability_init_result =
+        capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    if (capability_init_result.IsError()) {
+        return capability_init_result;
+    }
+
+    return handle_table.SetSize(capabilities.GetHandleTableSize());
 }

 void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
@@ -126,7 +133,7 @@ void Process::PrepareForTermination() {
            if (thread->GetOwnerProcess() != this)
                continue;

-            if (thread == GetCurrentThread())
+            if (thread == system.CurrentScheduler().GetCurrentThread())
                continue;

            // TODO(Subv): When are the other running/ready threads terminated?
@@ -138,7 +145,6 @@ void Process::PrepareForTermination() {
        }
    };

-    const auto& system = Core::System::GetInstance();
    stop_threads(system.Scheduler(0).GetThreadList());
    stop_threads(system.Scheduler(1).GetThreadList());
    stop_threads(system.Scheduler(2).GetThreadList());
@@ -221,14 +227,12 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeMutable);

    // Clear instruction cache in CPU JIT
-    Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
-    Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
+    system.InvalidateCpuInstructionCaches();
 }

-Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {}
-Kernel::Process::~Process() {}
+Process::Process(Core::System& system)
+    : WaitObject{system.Kernel()}, address_arbiter{system}, system{system} {}
+Process::~Process() = default;

 void Process::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -12,12 +12,17 @@
 #include <vector>
 #include <boost/container/static_vector.hpp>
 #include "common/common_types.h"
+#include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/process_capability.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/result.h"

+namespace Core {
+class System;
+}
+
 namespace FileSys {
 class ProgramMetadata;
 }
@@ -116,7 +121,7 @@ public:

    static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;

-    static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name);
+    static SharedPtr<Process> Create(Core::System& system, std::string&& name);

    std::string GetTypeName() const override {
        return "Process";
@@ -150,6 +155,16 @@ public:
        return handle_table;
    }

+    /// Gets a reference to the process' address arbiter.
+    AddressArbiter& GetAddressArbiter() {
+        return address_arbiter;
+    }
+
+    /// Gets a const reference to the process' address arbiter.
+    const AddressArbiter& GetAddressArbiter() const {
+        return address_arbiter;
+    }
+
    /// Gets the current status of the process
    ProcessStatus GetStatus() const {
        return status;
@@ -251,7 +266,7 @@ public:
    void FreeTLSSlot(VAddr tls_address);

 private:
-    explicit Process(KernelCore& kernel);
+    explicit Process(Core::System& system);
    ~Process() override;

    /// Checks if the specified thread should wait until this process is available.
@@ -309,9 +324,16 @@ private:
    /// Per-process handle table for storing created object handles in.
    HandleTable handle_table;

+    /// Per-process address arbiter.
+    AddressArbiter address_arbiter;
+
    /// Random values for svcGetInfo RandomEntropy
    std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy;

+    /// System context
+    Core::System& system;
+
+    /// Name of this process
    std::string name;
 };

--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
    interrupt_capabilities.set();

    // Allow using the maximum possible amount of handles
-    handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
+    handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);

    // Allow all debugging capabilities.
    is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
        return ERR_RESERVED_VALUE;
    }

-    handle_table_size = (flags >> 16) & 0x3FF;
+    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
    }

    /// Gets the number of total allowable handles for the process' handle table.
-    u32 GetHandleTableSize() const {
+    s32 GetHandleTableSize() const {
        return handle_table_size;
    }

@@ -252,7 +252,7 @@ private:
    u64 core_mask = 0;
    u64 priority_mask = 0;

-    u32 handle_table_size = 0;
+    s32 handle_table_size = 0;
    u32 kernel_version = 0;

    ProgramType program_type = ProgramType::SysModule;
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {

 std::mutex Scheduler::scheduler_mutex;

-Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {}
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
+    : cpu_core{cpu_core}, system{system} {}

 Scheduler::~Scheduler() {
    for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {

 void Scheduler::SwitchContext(Thread* new_thread) {
    Thread* const previous_thread = GetCurrentThread();
-    Process* const previous_process = Core::CurrentProcess();
+    Process* const previous_process = system.Kernel().CurrentProcess();

    UpdateLastContextSwitchTime(previous_thread, previous_process);

@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {

        auto* const thread_owner_process = current_thread->GetOwnerProcess();
        if (previous_process != thread_owner_process) {
-            Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process);
-            SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table);
+            system.Kernel().MakeCurrentProcess(thread_owner_process);
+            Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
        }

        cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {

 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = Core::Timing::GetTicks();
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;

    if (thread != nullptr) {
@@ -198,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);

    // Yield this thread -- sleep for zero time and force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);
 }

 void Scheduler::YieldWithLoadBalancing(Thread* thread) {
@@ -214,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
    ASSERT(priority < THREADPRIO_COUNT);

    // Sleep for zero time to be able to force reschedule to different thread
-    WaitCurrentThread_Sleep();
-    GetCurrentThread()->WakeAfterDelay(0);
+    GetCurrentThread()->Sleep(0);

    Thread* suggested_thread = nullptr;

@@ -223,8 +222,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
    // Take the first non-nullptr one
    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
        const auto res =
-            Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(
-                core, priority);
+            system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);

        // If scheduler provides a suggested thread
        if (res != nullptr) {
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@

 namespace Core {
 class ARM_Interface;
-}
+class System;
+} // namespace Core

 namespace Kernel {

@@ -21,7 +22,7 @@ class Process;

 class Scheduler final {
 public:
-    explicit Scheduler(Core::ARM_Interface& cpu_core);
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
    ~Scheduler();

    /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
    Core::ARM_Interface& cpu_core;
    u64 last_context_switch_time = 0;

+    Core::System& system;
    static std::mutex scheduler_mutex;
 };

--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -26,6 +26,10 @@ ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
    return MakeResult(std::move(session));
 }

+void ServerPort::AppendPendingSession(SharedPtr<ServerSession> pending_session) {
+    pending_sessions.push_back(std::move(pending_session));
+}
+
 bool ServerPort::ShouldWait(Thread* thread) const {
    // If there are no pending sessions, we wait until a new one is added.
    return pending_sessions.empty();
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -22,6 +22,8 @@ class SessionRequestHandler;

 class ServerPort final : public WaitObject {
 public:
+    using HLEHandler = std::shared_ptr<SessionRequestHandler>;
+
    /**
     * Creates a pair of ServerPort and an associated ClientPort.
     *
@@ -51,22 +53,27 @@ public:
     */
    ResultVal<SharedPtr<ServerSession>> Accept();

+    /// Whether or not this server port has an HLE handler available.
+    bool HasHLEHandler() const {
+        return hle_handler != nullptr;
+    }
+
+    /// Gets the HLE handler for this port.
+    HLEHandler GetHLEHandler() const {
+        return hle_handler;
+    }
+
    /**
     * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
     * will inherit a reference to this handler.
     */
-    void SetHleHandler(std::shared_ptr<SessionRequestHandler> hle_handler_) {
+    void SetHleHandler(HLEHandler hle_handler_) {
        hle_handler = std::move(hle_handler_);
    }

-    std::string name; ///< Name of port (optional)
-
-    /// ServerSessions waiting to be accepted by the port
-    std::vector<SharedPtr<ServerSession>> pending_sessions;
-
-    /// This session's HLE request handler template (optional)
-    /// ServerSessions created from this port inherit a reference to this handler.
-    std::shared_ptr<SessionRequestHandler> hle_handler;
+    /// Appends a ServerSession to the collection of ServerSessions
+    /// waiting to be accepted by this port.
+    void AppendPendingSession(SharedPtr<ServerSession> pending_session);

    bool ShouldWait(Thread* thread) const override;
    void Acquire(Thread* thread) override;
@@ -74,6 +81,16 @@ public:
 private:
    explicit ServerPort(KernelCore& kernel);
    ~ServerPort() override;
+
+    /// ServerSessions waiting to be accepted by the port
+    std::vector<SharedPtr<ServerSession>> pending_sessions;
+
+    /// This session's HLE request handler template (optional)
+    /// ServerSessions created from this port inherit a reference to this handler.
+    HLEHandler hle_handler;
+
+    /// Name of the port (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) {
    pending_requesting_threads.pop_back();
 }

-ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto* const domain_message_header = context.GetDomainMessageHeader();
-    if (domain_message_header) {
-        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
-        context.SetDomainRequestHandlers(domain_request_handlers);
-
-        // If there is a DomainMessageHeader, then this is CommandType "Request"
-        const u32 object_id{context.GetDomainMessageHeader()->object_id};
-        switch (domain_message_header->command) {
-        case IPC::DomainMessageHeader::CommandType::SendMessage:
-            if (object_id > domain_request_handlers.size()) {
-                LOG_CRITICAL(IPC,
-                             "object_id {} is too big! This probably means a recent service call "
-                             "to {} needed to return a new interface!",
-                             object_id, name);
-                UNREACHABLE();
-                return RESULT_SUCCESS; // Ignore error if asserts are off
-            }
-            return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
-
-        case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
-            LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
-
-            domain_request_handlers[object_id - 1] = nullptr;
-
-            IPC::ResponseBuilder rb{context, 2};
-            rb.Push(RESULT_SUCCESS);
-            return RESULT_SUCCESS;
-        }
-        }
-
-        LOG_CRITICAL(IPC, "Unknown domain command={}",
-                     static_cast<int>(domain_message_header->command.Value()));
-        ASSERT(false);
+void ServerSession::ClientDisconnected() {
+    // We keep a shared pointer to the hle handler to keep it alive throughout
+    // the call to ClientDisconnected, as ClientDisconnected invalidates the
+    // hle_handler member itself during the course of the function executing.
+    std::shared_ptr<SessionRequestHandler> handler = hle_handler;
+    if (handler) {
+        // Note that after this returns, this server session's hle_handler is
+        // invalidated (set to null).
+        handler->ClientDisconnected(this);
    }

+    // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set
+    // their WaitSynchronization result to 0xC920181A.
+
+    // Clean up the list of client threads with pending requests, they are unneeded now that the
+    // client endpoint is closed.
+    pending_requesting_threads.clear();
+    currently_handling = nullptr;
+}
+
+void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) {
+    domain_request_handlers.push_back(std::move(handler));
+}
+
+std::size_t ServerSession::NumDomainRequestHandlers() const {
+    return domain_request_handlers.size();
+}
+
+ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
+    if (!context.HasDomainMessageHeader()) {
+        return RESULT_SUCCESS;
+    }
+
+    // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
+    context.SetDomainRequestHandlers(domain_request_handlers);
+
+    // If there is a DomainMessageHeader, then this is CommandType "Request"
+    const auto& domain_message_header = context.GetDomainMessageHeader();
+    const u32 object_id{domain_message_header.object_id};
+    switch (domain_message_header.command) {
+    case IPC::DomainMessageHeader::CommandType::SendMessage:
+        if (object_id > domain_request_handlers.size()) {
+            LOG_CRITICAL(IPC,
+                         "object_id {} is too big! This probably means a recent service call "
+                         "to {} needed to return a new interface!",
+                         object_id, name);
+            UNREACHABLE();
+            return RESULT_SUCCESS; // Ignore error if asserts are off
+        }
+        return domain_request_handlers[object_id - 1]->HandleSyncRequest(context);
+
+    case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
+        LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
+
+        domain_request_handlers[object_id - 1] = nullptr;
+
+        IPC::ResponseBuilder rb{context, 2};
+        rb.Push(RESULT_SUCCESS);
+        return RESULT_SUCCESS;
+    }
+    }
+
+    LOG_CRITICAL(IPC, "Unknown domain command={}",
+                 static_cast<int>(domain_message_header.command.Value()));
+    ASSERT(false);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -46,6 +46,14 @@ public:
        return HANDLE_TYPE;
    }

+    Session* GetParent() {
+        return parent.get();
+    }
+
+    const Session* GetParent() const {
+        return parent.get();
+    }
+
    using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>;

    /**
@@ -78,23 +86,16 @@ public:

    void Acquire(Thread* thread) override;

-    std::string name;                ///< The name of this session (optional)
-    std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint.
-    std::shared_ptr<SessionRequestHandler>
-        hle_handler; ///< This session's HLE request handler (applicable when not a domain)
+    /// Called when a client disconnection occurs.
+    void ClientDisconnected();

-    /// This is the list of domain request handlers (after conversion to a domain)
-    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+    /// Adds a new domain request handler to the collection of request handlers within
+    /// this ServerSession instance.
+    void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler);

-    /// List of threads that are pending a response after a sync request. This list is processed in
-    /// a LIFO manner, thus, the last request will be dispatched first.
-    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
-    std::vector<SharedPtr<Thread>> pending_requesting_threads;
-
-    /// Thread whose request is currently being handled. A request is considered "handled" when a
-    /// response is sent via svcReplyAndReceive.
-    /// TODO(Subv): Find a better name for this.
-    SharedPtr<Thread> currently_handling;
+    /// Retrieves the total number of domain request handlers that have been
+    /// appended to this ServerSession instance.
+    std::size_t NumDomainRequestHandlers() const;

    /// Returns true if the session has been converted to a domain, otherwise False
    bool IsDomain() const {
@@ -129,8 +130,30 @@ private:
    /// object handle.
    ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context);

+    /// The parent session, which links to the client endpoint.
+    std::shared_ptr<Session> parent;
+
+    /// This session's HLE request handler (applicable when not a domain)
+    std::shared_ptr<SessionRequestHandler> hle_handler;
+
+    /// This is the list of domain request handlers (after conversion to a domain)
+    std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
+
+    /// List of threads that are pending a response after a sync request. This list is processed in
+    /// a LIFO manner, thus, the last request will be dispatched first.
+    /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test.
+    std::vector<SharedPtr<Thread>> pending_requesting_threads;
+
+    /// Thread whose request is currently being handled. A request is considered "handled" when a
+    /// response is sent via svcReplyAndReceive.
+    /// TODO(Subv): Find a better name for this.
+    SharedPtr<Thread> currently_handling;
+
    /// When set to True, converts the session to a domain at the end of the command
    bool convert_to_domain{};
+
+    /// The name of this session (optional)
+    std::string name;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -6,7 +6,6 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "core/core.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/shared_memory.h"
@@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
        shared_memory->backing_block_offset = 0;

        // Refresh the address mappings for the current process.
-        if (Core::CurrentProcess() != nullptr) {
-            Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
+        if (kernel.CurrentProcess() != nullptr) {
+            kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings(
                shared_memory->backing_block.get());
        }
    } else {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -20,6 +20,7 @@
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
@@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) {
    return address + size > address;
 }

-// Checks if a given address range lies within a larger address range.
-constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
-                                    VAddr address_range_end) {
-    const VAddr end_address = address + size - 1;
-    return address_range_begin <= address && end_address <= address_range_end - 1;
-}
-
-bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
-                                vm.GetAddressSpaceEndAddress());
-}
-
-bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
-    return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
-                                vm.GetNewMapRegionEndAddress());
-}
-
 // 8 GiB
 constexpr u64 MAIN_MEMORY_SIZE = 0x200000000;

@@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(src_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                  src_addr, size);
        return ERR_INVALID_ADDRESS_STATE;
    }

-    if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+    if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}",
                  dst_addr, size);
@@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) {
    auto* const current_process = Core::CurrentProcess();
    auto& vm_manager = current_process->VMManager();

-    if (!IsInsideAddressSpace(vm_manager, addr, size)) {
+    if (!vm_manager.IsWithinAddressSpace(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
@@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr
    }

    auto& vm_manager = Core::CurrentProcess()->VMManager();
-    if (!IsInsideAddressSpace(vm_manager, address, size)) {
+    if (!vm_manager.IsWithinAddressSpace(address, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Given address (0x{:016X}) is outside the bounds of the address space.", address);
        return ERR_INVALID_ADDRESS_STATE;
@@ -918,6 +902,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        }

        const auto& system = Core::System::GetInstance();
+        const auto& core_timing = system.CoreTiming();
        const auto& scheduler = system.CurrentScheduler();
        const auto* const current_thread = scheduler.GetCurrentThread();
        const bool same_thread = current_thread == thread;
@@ -927,9 +912,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();

-            out_ticks = thread_ticks + (Core::Timing::GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = Core::Timing::GetTicks() - prev_ctx_ticks;
+            out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
        }

        *result = out_ticks;
@@ -1299,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) {

 /// Called when a thread exits
 static void ExitThread() {
-    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
+    auto& system = Core::System::GetInstance();

-    ExitCurrentThread();
-    Core::System::GetInstance().PrepareReschedule();
+    LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
+
+    auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
+    current_thread->Stop();
+    system.CurrentScheduler().RemoveThread(current_thread);
+    system.PrepareReschedule();
 }

 /// Sleep the current thread
@@ -1315,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) {
        YieldAndWaitForLoadBalancing = -2,
    };

+    auto& system = Core::System::GetInstance();
+    auto& scheduler = system.CurrentScheduler();
+    auto* const current_thread = scheduler.GetCurrentThread();
+
    if (nanoseconds <= 0) {
-        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
        switch (static_cast<SleepType>(nanoseconds)) {
        case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithoutLoadBalancing(current_thread);
            break;
        case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(GetCurrentThread());
+            scheduler.YieldWithLoadBalancing(current_thread);
            break;
        case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
+            scheduler.YieldAndWaitForLoadBalancing(current_thread);
            break;
        default:
            UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
        }
    } else {
-        // Sleep current thread and check for next thread to schedule
-        WaitCurrentThread_Sleep();
-
-        // Create an event to wake the thread up after the specified nanosecond delay has passed
-        GetCurrentThread()->WakeAfterDelay(nanoseconds);
+        current_thread->Sleep(nanoseconds);
    }

    // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i)
-        Core::System::GetInstance().CpuCore(i).PrepareReschedule();
+    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
+        system.CpuCore(i).PrepareReschedule();
+    }
 }

 /// Wait process wide key atomic
@@ -1494,20 +1483,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout
        return ERR_INVALID_ADDRESS;
    }

-    switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
-    case AddressArbiter::ArbitrationType::WaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
-    case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
-        return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
-    case AddressArbiter::ArbitrationType::WaitIfEqual:
-        return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan "
-                  "or WaitIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
+    auto& address_arbiter =
+        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
 }

 // Signals to an address (via Address Arbiter)
@@ -1525,31 +1504,21 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
        return ERR_INVALID_ADDRESS;
    }

-    switch (static_cast<AddressArbiter::SignalType>(type)) {
-    case AddressArbiter::SignalType::Signal:
-        return AddressArbiter::SignalToAddress(address, num_to_wake);
-    case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
-        return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
-    case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
-        return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
-                                                                             num_to_wake);
-    default:
-        LOG_ERROR(Kernel_SVC,
-                  "Invalid signal type, expected Signal, IncrementAndSignalIfEqual "
-                  "or ModifyByWaitingCountAndSignalIfEqual but got {}",
-                  type);
-        return ERR_INVALID_ENUM_VALUE;
-    }
+    const auto signal_type = static_cast<AddressArbiter::SignalType>(type);
+    auto& address_arbiter =
+        Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter();
+    return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
 }

 /// This returns the total CPU ticks elapsed since the CPU was powered-on
 static u64 GetSystemTick() {
    LOG_TRACE(Kernel_SVC, "called");

-    const u64 result{Core::Timing::GetTicks()};
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+    const u64 result{core_timing.GetTicks()};

    // Advance time to defeat dumb games that busy-wait for the frame to end.
-    Core::Timing::AddTicks(400);
+    core_timing.AddTicks(400);

    return result;
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
 #include <optional>
 #include <vector>

-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -43,7 +41,8 @@ Thread::~Thread() = default;

 void Thread::Stop() {
    // Cancel any outstanding wakeup events for this thread
-    Core::Timing::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                             callback_handle);
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;

@@ -67,17 +66,6 @@ void Thread::Stop() {
    owner_process->FreeTLSSlot(tls_address);
 }

-void WaitCurrentThread_Sleep() {
-    Thread* thread = GetCurrentThread();
-    thread->SetStatus(ThreadStatus::WaitSleep);
-}
-
-void ExitCurrentThread() {
-    Thread* thread = GetCurrentThread();
-    thread->Stop();
-    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
-}
-
 void Thread::WakeAfterDelay(s64 nanoseconds) {
    // Don't schedule a wakeup if the thread wants to wait forever
    if (nanoseconds == -1)
@@ -85,13 +73,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {

    // This function might be called from any thread so we have to be cautious and use the
    // thread-safe version of ScheduleEvent.
-    Core::Timing::ScheduleEventThreadsafe(Core::Timing::nsToCycles(nanoseconds),
-                                          kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
+        Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
+        callback_handle);
 }

 void Thread::CancelWakeupTimer() {
-    Core::Timing::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(),
-                                            callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
+        kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -182,14 +171,13 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
        return ERR_INVALID_PROCESSOR_ID;
    }

-    // TODO(yuriks): Other checks, returning 0xD9001BEA
-
    if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) {
        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
        // TODO (bunnei): Find the correct error code to use here
        return ResultCode(-1);
    }

+    auto& system = Core::System::GetInstance();
    SharedPtr<Thread> thread(new Thread(kernel));

    thread->thread_id = kernel.CreateNewThreadID();
@@ -198,7 +186,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->stack_top = stack_top;
    thread->tpidr_el0 = 0;
    thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = Core::Timing::GetTicks();
+    thread->last_running_ticks = system.CoreTiming().GetTicks();
    thread->processor_id = processor_id;
    thread->ideal_core = processor_id;
    thread->affinity_mask = 1ULL << processor_id;
@@ -209,7 +197,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->name = std::move(name);
    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = &owner_process;
-    thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id);
+    thread->scheduler = &system.Scheduler(processor_id);
    thread->scheduler->AddThread(thread, priority);
    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);

@@ -258,7 +246,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
    }

    if (status == ThreadStatus::Running) {
-        last_running_ticks = Core::Timing::GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    }

    status = new_status;
@@ -268,8 +256,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
    if (thread->lock_owner == this) {
        // If the thread is already waiting for this thread to release the mutex, ensure that the
        // waiters list is consistent and return without doing anything.
-        auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-        ASSERT(itr != wait_mutex_threads.end());
+        const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+        ASSERT(iter != wait_mutex_threads.end());
        return;
    }

@@ -277,11 +265,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
    ASSERT(thread->lock_owner == nullptr);

    // Ensure that the thread is not already in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr == wait_mutex_threads.end());
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter == wait_mutex_threads.end());

+    // Keep the list in an ordered fashion
+    const auto insertion_point = std::find_if(
+        wait_mutex_threads.begin(), wait_mutex_threads.end(),
+        [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
+    wait_mutex_threads.insert(insertion_point, thread);
    thread->lock_owner = this;
-    wait_mutex_threads.emplace_back(std::move(thread));
+
    UpdatePriority();
 }

@@ -289,32 +282,43 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
    ASSERT(thread->lock_owner == this);

    // Ensure that the thread is in the list of mutex waiters
-    auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
-    ASSERT(itr != wait_mutex_threads.end());
+    const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+    ASSERT(iter != wait_mutex_threads.end());
+
+    wait_mutex_threads.erase(iter);

-    boost::remove_erase(wait_mutex_threads, thread);
    thread->lock_owner = nullptr;
    UpdatePriority();
 }

 void Thread::UpdatePriority() {
-    // Find the highest priority among all the threads that are waiting for this thread's lock
+    // If any of the threads waiting on the mutex have a higher priority
+    // (taking into account priority inheritance), then this thread inherits
+    // that thread's priority.
    u32 new_priority = nominal_priority;
-    for (const auto& thread : wait_mutex_threads) {
-        if (thread->nominal_priority < new_priority)
-            new_priority = thread->nominal_priority;
+    if (!wait_mutex_threads.empty()) {
+        if (wait_mutex_threads.front()->current_priority < new_priority) {
+            new_priority = wait_mutex_threads.front()->current_priority;
+        }
    }

-    if (new_priority == current_priority)
+    if (new_priority == current_priority) {
        return;
+    }

    scheduler->SetThreadPriority(this, new_priority);
-
    current_priority = new_priority;

+    if (!lock_owner) {
+        return;
+    }
+
+    // Ensure that the thread is within the correct location in the waiting list.
+    lock_owner->RemoveMutexWaiter(this);
+    lock_owner->AddMutexWaiter(this);
+
    // Recursively update the priority of the thread that depends on the priority of this one.
-    if (lock_owner)
-        lock_owner->UpdatePriority();
+    lock_owner->UpdatePriority();
 }

 void Thread::ChangeCore(u32 core, u64 mask) {
@@ -390,6 +394,14 @@ void Thread::SetActivity(ThreadActivity value) {
    }
 }

+void Thread::Sleep(s64 nanoseconds) {
+    // Sleep current thread and check for next thread to schedule
+    SetStatus(ThreadStatus::WaitSleep);
+
+    // Create an event to wake the thread up after the specified nanosecond delay has passed
+    WakeAfterDelay(nanoseconds);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -383,6 +383,9 @@ public:

    void SetActivity(ThreadActivity value);

+    /// Sleeps this thread for the given amount of nanoseconds.
+    void Sleep(s64 nanoseconds);
+
 private:
    explicit Thread(KernelCore& kernel);
    ~Thread() override;
@@ -398,8 +401,14 @@ private:
    VAddr entry_point = 0;
    VAddr stack_top = 0;

-    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
-    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed
+    /// Nominal thread priority, as set by the emulated application.
+    /// The nominal priority is the thread priority without priority
+    /// inheritance taken into account.
+    u32 nominal_priority = 0;
+
+    /// Current thread priority. This may change over the course of the
+    /// thread's lifetime in order to facilitate priority inheritance.
+    u32 current_priority = 0;

    u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
    u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
@@ -460,14 +469,4 @@ private:
 */
 Thread* GetCurrentThread();

-/**
- * Waits the current thread on a sleep
- */
-void WaitCurrentThread_Sleep();
-
-/**
- * Stops the current thread and removes it from the thread_list
- */
-void ExitCurrentThread();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,18 +7,18 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/memory_hook.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/vm_manager.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"
 #include "core/memory_setup.h"

 namespace Kernel {
-
-static const char* GetMemoryStateName(MemoryState state) {
+namespace {
+const char* GetMemoryStateName(MemoryState state) {
    static constexpr const char* names[] = {
        "Unmapped",         "Io",
        "Normal",           "CodeStatic",
@@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) {
    return names[ToSvcMemoryState(state)];
 }

+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+                                    VAddr address_range_end) {
+    const VAddr end_address = address + size - 1;
+    return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+} // Anonymous namespace
+
 bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
    ASSERT(base + size == next.base);
    if (permissions != next.permissions || state != next.state || attribute != next.attribute ||
@@ -169,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {

 ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
                                                   MemoryState state,
-                                                   Memory::MemoryHookPointer mmio_handler) {
+                                                   Common::MemoryHookPointer mmio_handler) {
    // This is the appropriately sized VMA that will turn into our allocation.
    CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
    VirtualMemoryArea& final_vma = vma_handle->second;
@@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p
 }

 ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p
 }

 ResultCode VMManager::HeapFree(VAddr target, u64 size) {
-    if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() ||
-        target + size < target) {
+    if (!IsWithinHeapRegion(target, size)) {
        return ERR_INVALID_ADDRESS;
    }

@@ -618,7 +624,7 @@ void VMManager::ClearPageTable() {
    std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
    page_table.special_regions.clear();
    std::fill(page_table.attributes.begin(), page_table.attributes.end(),
-              Memory::PageType::Unmapped);
+              Common::PageType::Unmapped);
 }

 VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
@@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const {
    return address_space_width;
 }

+bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(),
+                                GetAddressSpaceEndAddress());
+}
+
 VAddr VMManager::GetASLRRegionBaseAddress() const {
    return aslr_region_base;
 }
@@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const {
    return code_region_end - code_region_base;
 }

+bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(),
+                                GetCodeRegionEndAddress());
+}
+
 VAddr VMManager::GetHeapRegionBaseAddress() const {
    return heap_region_base;
 }
@@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const {
    return heap_region_end - heap_region_base;
 }

+bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(),
+                                GetHeapRegionEndAddress());
+}
+
 VAddr VMManager::GetMapRegionBaseAddress() const {
    return map_region_base;
 }
@@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const {
    return map_region_end - map_region_base;
 }

+bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress());
+}
+
 VAddr VMManager::GetNewMapRegionBaseAddress() const {
    return new_map_region_base;
 }
@@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const {
    return new_map_region_end - new_map_region_base;
 }

+bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(),
+                                GetNewMapRegionEndAddress());
+}
+
 VAddr VMManager::GetTLSIORegionBaseAddress() const {
    return tls_io_region_base;
 }
@@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const {
    return tls_io_region_end - tls_io_region_base;
 }

+bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const {
+    return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(),
+                                GetTLSIORegionEndAddress());
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
+#include "common/memory_hook.h"
+#include "common/page_table.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
-#include "core/memory_hook.h"

 namespace FileSys {
 enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
    // Settings for type = MMIO
    /// Physical address of the register area this VMA maps to.
    PAddr paddr = 0;
-    Memory::MemoryHookPointer mmio_handler = nullptr;
+    Common::MemoryHookPointer mmio_handler = nullptr;

    /// Tests if this area can be merged to the right with `next`.
    bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
     * @param mmio_handler The handler that will implement read and write for this MMIO region.
     */
    ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
-                                 Memory::MemoryHookPointer mmio_handler);
+                                 Common::MemoryHookPointer mmio_handler);

    /// Unmaps a range of addresses, splitting VMAs as necessary.
    ResultCode UnmapRange(VAddr target, u64 size);
@@ -432,18 +433,21 @@ public:
    /// Gets the address space width in bits.
    u64 GetAddressSpaceWidth() const;

+    /// Determines whether or not the given address range lies within the address space.
+    bool IsWithinAddressSpace(VAddr address, u64 size) const;
+
    /// Gets the base address of the ASLR region.
    VAddr GetASLRRegionBaseAddress() const;

    /// Gets the end address of the ASLR region.
    VAddr GetASLRRegionEndAddress() const;

-    /// Determines whether or not the specified address range is within the ASLR region.
-    bool IsWithinASLRRegion(VAddr address, u64 size) const;
-
    /// Gets the size of the ASLR region
    u64 GetASLRRegionSize() const;

+    /// Determines whether or not the specified address range is within the ASLR region.
+    bool IsWithinASLRRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the code region.
    VAddr GetCodeRegionBaseAddress() const;

@@ -453,6 +457,9 @@ public:
    /// Gets the total size of the code region in bytes.
    u64 GetCodeRegionSize() const;

+    /// Determines whether or not the specified range is within the code region.
+    bool IsWithinCodeRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the heap region.
    VAddr GetHeapRegionBaseAddress() const;

@@ -462,6 +469,9 @@ public:
    /// Gets the total size of the heap region in bytes.
    u64 GetHeapRegionSize() const;

+    /// Determines whether or not the specified range is within the heap region.
+    bool IsWithinHeapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the map region.
    VAddr GetMapRegionBaseAddress() const;

@@ -471,6 +481,9 @@ public:
    /// Gets the total size of the map region in bytes.
    u64 GetMapRegionSize() const;

+    /// Determines whether or not the specified range is within the map region.
+    bool IsWithinMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the new map region.
    VAddr GetNewMapRegionBaseAddress() const;

@@ -480,6 +493,9 @@ public:
    /// Gets the total size of the new map region in bytes.
    u64 GetNewMapRegionSize() const;

+    /// Determines whether or not the given address range is within the new map region
+    bool IsWithinNewMapRegion(VAddr address, u64 size) const;
+
    /// Gets the base address of the TLS IO region.
    VAddr GetTLSIORegionBaseAddress() const;

@@ -489,9 +505,12 @@ public:
    /// Gets the total size of the TLS IO region in bytes.
    u64 GetTLSIORegionSize() const;

+    /// Determines if the given address range is within the TLS IO region.
+    bool IsWithinTLSIORegion(VAddr address, u64 size) const;
+
    /// Each VMManager has its own page table, which is set as the main one when the owning process
    /// is scheduled.
-    Memory::PageTable page_table;
+    Common::PageTable page_table{Memory::PAGE_BITS};

 private:
    using VMAIter = VMAMap::iterator;
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -8,19 +8,10 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
 #include "common/common_types.h"

 // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes

-/**
- * Detailed description of the error. Code 0 always means success.
- */
-enum class ErrorDescription : u32 {
-    Success = 0,
-    RemoteProcessDead = 301,
-};
-
 /**
 * Identifies the module which caused the error. Error codes can be propagated through a call
 * chain, meaning that this doesn't always correspond to the module where the API call made is
@@ -121,7 +112,7 @@ enum class ErrorModule : u32 {
    ShopN = 811,
 };

-/// Encapsulates a CTR-OS error code, allowing it to be separated into its constituent fields.
+/// Encapsulates a Horizon OS error code, allowing it to be separated into its constituent fields.
 union ResultCode {
    u32 raw;

@@ -134,17 +125,9 @@ union ResultCode {

    constexpr explicit ResultCode(u32 raw) : raw(raw) {}

-    constexpr ResultCode(ErrorModule module, ErrorDescription description)
-        : ResultCode(module, static_cast<u32>(description)) {}
-
    constexpr ResultCode(ErrorModule module_, u32 description_)
        : raw(module.FormatValue(module_) | description.FormatValue(description_)) {}

-    constexpr ResultCode& operator=(const ResultCode& o) {
-        raw = o.raw;
-        return *this;
-    }
-
    constexpr bool IsSuccess() const {
        return raw == 0;
    }
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -2,10 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <array>
 #include <cinttypes>
 #include <cstring>
-#include <stack>
 #include "audio_core/audio_renderer.h"
 #include "core/core.h"
 #include "core/file_sys/savedata_factory.h"
@@ -93,38 +93,84 @@ void IWindowController::AcquireForegroundRights(Kernel::HLERequestContext& ctx)
 }

 IAudioController::IAudioController() : ServiceFramework("IAudioController") {
+    // clang-format off
    static const FunctionInfo functions[] = {
        {0, &IAudioController::SetExpectedMasterVolume, "SetExpectedMasterVolume"},
-        {1, &IAudioController::GetMainAppletExpectedMasterVolume,
-         "GetMainAppletExpectedMasterVolume"},
-        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume,
-         "GetLibraryAppletExpectedMasterVolume"},
-        {3, nullptr, "ChangeMainAppletMasterVolume"},
-        {4, nullptr, "SetTransparentVolumeRate"},
+        {1, &IAudioController::GetMainAppletExpectedMasterVolume, "GetMainAppletExpectedMasterVolume"},
+        {2, &IAudioController::GetLibraryAppletExpectedMasterVolume, "GetLibraryAppletExpectedMasterVolume"},
+        {3, &IAudioController::ChangeMainAppletMasterVolume, "ChangeMainAppletMasterVolume"},
+        {4, &IAudioController::SetTransparentAudioRate, "SetTransparentVolumeRate"},
    };
+    // clang-format on
+
    RegisterHandlers(functions);
 }

 IAudioController::~IAudioController() = default;

 void IAudioController::SetExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    IPC::RequestParser rp{ctx};
+    const float main_applet_volume_tmp = rp.Pop<float>();
+    const float library_applet_volume_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}, library_applet_volume={}",
+              main_applet_volume_tmp, library_applet_volume_tmp);
+
+    // Ensure the volume values remain within the 0-100% range
+    main_applet_volume = std::clamp(main_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+    library_applet_volume =
+        std::clamp(library_applet_volume_tmp, min_allowed_volume, max_allowed_volume);
+
    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
 }

 void IAudioController::GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. main_applet_volume={}", main_applet_volume);
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(main_applet_volume);
 }

 void IAudioController::GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called. library_applet_volume={}", library_applet_volume);
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push(volume);
+    rb.Push(library_applet_volume);
+}
+
+void IAudioController::ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx) {
+    struct Parameters {
+        float volume;
+        s64 fade_time_ns;
+    };
+    static_assert(sizeof(Parameters) == 16);
+
+    IPC::RequestParser rp{ctx};
+    const auto parameters = rp.PopRaw<Parameters>();
+
+    LOG_DEBUG(Service_AM, "called. volume={}, fade_time_ns={}", parameters.volume,
+              parameters.fade_time_ns);
+
+    main_applet_volume = std::clamp(parameters.volume, min_allowed_volume, max_allowed_volume);
+    fade_time_ns = std::chrono::nanoseconds{parameters.fade_time_ns};
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+}
+
+void IAudioController::SetTransparentAudioRate(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const float transparent_volume_rate_tmp = rp.Pop<float>();
+
+    LOG_DEBUG(Service_AM, "called. transparent_volume_rate={}", transparent_volume_rate_tmp);
+
+    // Clamp volume range to 0-100%.
+    transparent_volume_rate =
+        std::clamp(transparent_volume_rate_tmp, min_allowed_volume, max_allowed_volume);
+
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
 }

 IDisplayController::IDisplayController() : ServiceFramework("IDisplayController") {
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <chrono>
 #include <memory>
 #include <queue>
 #include "core/hle/kernel/writable_event.h"
@@ -81,8 +82,21 @@ private:
    void SetExpectedMasterVolume(Kernel::HLERequestContext& ctx);
    void GetMainAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
    void GetLibraryAppletExpectedMasterVolume(Kernel::HLERequestContext& ctx);
+    void ChangeMainAppletMasterVolume(Kernel::HLERequestContext& ctx);
+    void SetTransparentAudioRate(Kernel::HLERequestContext& ctx);

-    u32 volume{100};
+    static constexpr float min_allowed_volume = 0.0f;
+    static constexpr float max_allowed_volume = 1.0f;
+
+    float main_applet_volume{0.25f};
+    float library_applet_volume{max_allowed_volume};
+    float transparent_volume_rate{min_allowed_volume};
+
+    // Volume transition fade time in nanoseconds.
+    // e.g. If the main applet volume was 0% and was changed to 50%
+    //      with a fade of 50ns, then over the course of 50ns,
+    //      the volume will gradually fade up to 50%
+    std::chrono::nanoseconds fade_time_ns{0};
 };

 class IDisplayController final : public ServiceFramework<IDisplayController> {
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -7,6 +7,7 @@
 #include "common/string_util.h"
 #include "core/core.h"
 #include "core/frontend/applets/software_keyboard.h"
+#include "core/hle/result.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/software_keyboard.h"

--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -9,10 +9,13 @@
 #include <vector>

 #include "common/common_funcs.h"
+#include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/am/am.h"
 #include "core/hle/service/am/applets/applets.h"

+union ResultCode;
+
 namespace Service::AM::Applets {

 enum class KeysetDisable : u32 {
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -18,17 +18,11 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audout_u.h"
+#include "core/hle/service/audio/errors.h"
 #include "core/memory.h"

 namespace Service::Audio {

-namespace ErrCodes {
-enum {
-    ErrorUnknown = 2,
-    BufferCountExceeded = 8,
-};
-}
-
 constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}};
 constexpr int DefaultSampleRate{48000};

@@ -68,12 +62,12 @@ public:
        RegisterHandlers(functions);

        // This is the event handle used to check if the audio buffer was released
-        auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioOutBufferReleased");
+        auto& system = Core::System::GetInstance();
+        buffer_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");

-        stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
-                                       std::move(unique_name),
+        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
+                                       audio_params.channel_count, std::move(unique_name),
                                       [=]() { buffer_event.writable->Signal(); });
    }

@@ -100,7 +94,7 @@ private:

        if (stream->IsPlaying()) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown));
+            rb.Push(ERR_OPERATION_FAILED);
            return;
        }

@@ -113,7 +107,9 @@ private:
    void StopAudioOut(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Audio, "called");

-        audio_core.StopStream(stream);
+        if (stream->IsPlaying()) {
+            audio_core.StopStream(stream);
+        }

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(RESULT_SUCCESS);
@@ -143,7 +139,8 @@ private:

        if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) {
            IPC::ResponseBuilder rb{ctx, 2};
-            rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded));
+            rb.Push(ERR_BUFFER_COUNT_EXCEEDED);
+            return;
        }

        IPC::ResponseBuilder rb{ctx, 2};
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -17,6 +17,7 @@
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/audio/audren_u.h"
+#include "core/hle/service/audio/errors.h"

 namespace Service::Audio {

@@ -37,15 +38,16 @@ public:
            {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"},
            {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"},
            {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"},
-            {11, nullptr, "ExecuteAudioRendererRendering"},
+            {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"},
        };
        // clang-format on
        RegisterHandlers(functions);

-        auto& kernel = Core::System::GetInstance().Kernel();
-        system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable);
+        auto& system = Core::System::GetInstance();
+        system_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
+                                                              system_event.writable);
    }

 private:
@@ -137,6 +139,17 @@ private:
        rb.Push(rendering_time_limit_percent);
    }

+    void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_Audio, "called");
+
+        // This service command currently only reports an unsupported operation
+        // error code, or aborts. Given that, we just always return an error
+        // code in this case.
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ERR_NOT_SUPPORTED);
+    }
+
    Kernel::EventPair system_event;
    std::unique_ptr<AudioCore::AudioRenderer> renderer;
    u32 rendering_time_limit_percent = 100;
@@ -234,7 +247,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
        {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
        {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
        {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"},
-        {3, nullptr, "OpenAudioRendererAuto"},
+        {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"},
        {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"},
    };
    // clang-format on
@@ -247,12 +260,7 @@ AudRenU::~AudRenU() = default;
 void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

-    IPC::RequestParser rp{ctx};
-    auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
-    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params));
+    OpenAudioRendererImpl(ctx);
 }

 void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
@@ -261,20 +269,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.unknown_c * 1024;
-    buffer_sz += 0x940 * (params.unknown_c + 1);
+    buffer_sz += params.submix_count * 1024;
+    buffer_sz += 0x940 * (params.submix_count + 1);
    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz +=
-        Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
-                            (params.mix_buffer_count + 6),
-                        0x40);
+    buffer_sz += Common::AlignUp(
+        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
+            (params.mix_buffer_count + 6),
+        0x40);

    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        u32 count = params.unknown_c + 1;
+        const u32 count = params.submix_count + 1;
        u64 node_count = Common::AlignUp(count, 0x40);
-        u64 node_state_buffer_sz =
+        const u64 node_state_buffer_sz =
            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
        u64 edge_matrix_buffer_sz = 0;
        node_count = Common::AlignUp(count * count, 0x40);
@@ -288,19 +296,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {

    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.unknown_2c;
+        buffer_sz += 0xE0 * params.num_splitter_send_channels;
        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
+        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
    }
    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
                    ((params.voice_count * 256) | 0x40);

-    if (params.unknown_1c >= 1) {
+    if (params.performance_frame_count >= 1) {
        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
                                      16 * params.voice_count + 16) +
                                     0x658) *
-                                            (params.unknown_1c + 1) +
+                                            (params.performance_frame_count + 1) +
                                        0xc0,
                                    0x40) +
                    output_sz;
@@ -324,6 +332,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<Audio::IAudioDevice>();
 }

+void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_Audio, "called");
+
+    OpenAudioRendererImpl(ctx);
+}
+
 void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_Audio, "(STUBBED) called");

@@ -334,6 +348,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c
                                                // based on the current revision
 }

+void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
+    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<IAudioRenderer>(params);
+}
+
 bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
    u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
    switch (feature) {
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -21,8 +21,11 @@ private:
    void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
    void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
    void GetAudioDeviceService(Kernel::HLERequestContext& ctx);
+    void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx);
    void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);

+    void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
+
    enum class AudioFeatures : u32 {
        Splitter,
    };
--- a/src/core/hle/service/audio/errors.h
+++ b/src/core/hle/service/audio/errors.h
@@ -0,0 +1,15 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::Audio {
+
+constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2};
+constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8};
+constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513};
+
+} // namespace Service::Audio
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -9,43 +9,32 @@

 #include <opus.h>

-#include "common/common_funcs.h"
+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/service/audio/hwopus.h"

 namespace Service::Audio {
-
+namespace {
 struct OpusDeleter {
    void operator()(void* ptr) const {
        operator delete(ptr);
    }
 };

-class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
+
+struct OpusPacketHeader {
+    // Packet size in bytes.
+    u32_be size;
+    // Indicates the final range of the codec's entropy coder.
+    u32_be final_range;
+};
+static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
+
+class OpusDecoderStateBase {
 public:
-    IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
-                                u32 channel_count)
-        : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
-          sample_rate(sample_rate), channel_count(channel_count) {
-        // clang-format off
-        static const FunctionInfo functions[] = {
-            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
-            {1, nullptr, "SetContext"},
-            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
-            {3, nullptr, "SetContextForMultiStream"},
-            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
-            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
-            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
-            {7, nullptr, "DecodeInterleavedForMultiStream"},
-        };
-        // clang-format on
-
-        RegisterHandlers(functions);
-    }
-
-private:
    /// Describes extra behavior that may be asked of the decoding context.
    enum class ExtraBehavior {
        /// No extra behavior.
@@ -55,30 +44,36 @@ private:
        ResetContext,
    };

-    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
+    enum class PerfTime {
+        Disabled,
+        Enabled,
+    };

-        DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None);
-    }
-
-    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
-    }
-
-    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
-        LOG_DEBUG(Audio, "called");
-
-        IPC::RequestParser rp{ctx};
-        const auto extra_behavior =
-            rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
-
-        u64 performance = 0;
-        DecodeInterleavedHelper(ctx, &performance, extra_behavior);
+    virtual ~OpusDecoderStateBase() = default;
+
+    // Decodes interleaved Opus packets. Optionally allows reporting time taken to
+    // perform the decoding, as well as any relevant extra behavior.
+    virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
+                                   ExtraBehavior extra_behavior) = 0;
+};
+
+// Represents the decoder state for a non-multistream decoder.
+class OpusDecoderState final : public OpusDecoderStateBase {
+public:
+    explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
+        : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
+
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
+                           ExtraBehavior extra_behavior) override {
+        if (perf_time == PerfTime::Disabled) {
+            DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
+        } else {
+            u64 performance = 0;
+            DecodeInterleavedHelper(ctx, &performance, extra_behavior);
+        }
    }

+private:
    void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
                                 ExtraBehavior extra_behavior) {
        u32 consumed = 0;
@@ -89,8 +84,7 @@ private:
            ResetDecoderContext();
        }

-        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
-                                       performance)) {
+        if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
            LOG_ERROR(Audio, "Failed to decode opus data");
            IPC::ResponseBuilder rb{ctx, 2};
            // TODO(ogniK): Use correct error code
@@ -109,27 +103,27 @@ private:
        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
    }

-    bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
-                                   std::vector<opus_int16>& output, u64* out_performance_time) {
+    bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
+                        std::vector<opus_int16>& output, u64* out_performance_time) const {
        const auto start_time = std::chrono::high_resolution_clock::now();
        const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
-        if (sizeof(OpusHeader) > input.size()) {
+        if (sizeof(OpusPacketHeader) > input.size()) {
            LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
-                      sizeof(OpusHeader), input.size());
+                      sizeof(OpusPacketHeader), input.size());
            return false;
        }

-        OpusHeader hdr{};
-        std::memcpy(&hdr, input.data(), sizeof(OpusHeader));
-        if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) {
+        OpusPacketHeader hdr{};
+        std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
+        if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
            LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
-                      sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size());
+                      sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
            return false;
        }

-        const auto frame = input.data() + sizeof(OpusHeader);
+        const auto frame = input.data() + sizeof(OpusPacketHeader);
        const auto decoded_sample_count = opus_packet_get_nb_samples(
-            frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)),
+            frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
            static_cast<opus_int32>(sample_rate));
        if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
            LOG_ERROR(
@@ -141,18 +135,18 @@ private:

        const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
        const auto out_sample_count =
-            opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0);
+            opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
        if (out_sample_count < 0) {
            LOG_ERROR(Audio,
                      "Incorrect sample count received from opus_decode, "
                      "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
-                      out_sample_count, frame_size, static_cast<u32>(hdr.sz));
+                      out_sample_count, frame_size, static_cast<u32>(hdr.size));
            return false;
        }

        const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
        sample_count = out_sample_count;
-        consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
+        consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
        if (out_performance_time != nullptr) {
            *out_performance_time =
                std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -167,21 +161,66 @@ private:
        opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
    }

-    struct OpusHeader {
-        u32_be sz; // Needs to be BE for some odd reason
-        INSERT_PADDING_WORDS(1);
-    };
-    static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
-
-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
+    OpusDecoderPtr decoder;
    u32 sample_rate;
    u32 channel_count;
 };

-static std::size_t WorkerBufferSize(u32 channel_count) {
+class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
+public:
+    explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
+        : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
+        // clang-format off
+        static const FunctionInfo functions[] = {
+            {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
+            {1, nullptr, "SetContext"},
+            {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
+            {3, nullptr, "SetContextForMultiStream"},
+            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
+            {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
+            {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
+            {7, nullptr, "DecodeInterleavedForMultiStream"},
+        };
+        // clang-format on
+
+        RegisterHandlers(functions);
+    }
+
+private:
+    void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
+                                         OpusDecoderStateBase::ExtraBehavior::None);
+    }
+
+    void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
+                                         OpusDecoderStateBase::ExtraBehavior::None);
+    }
+
+    void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Audio, "called");
+
+        IPC::RequestParser rp{ctx};
+        const auto extra_behavior = rp.Pop<bool>()
+                                        ? OpusDecoderStateBase::ExtraBehavior::ResetContext
+                                        : OpusDecoderStateBase::ExtraBehavior::None;
+
+        decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
+                                         extra_behavior);
+    }
+
+    std::unique_ptr<OpusDecoderStateBase> decoder_state;
+};
+
+std::size_t WorkerBufferSize(u32 channel_count) {
    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
    return opus_decoder_get_size(static_cast<int>(channel_count));
 }
+} // Anonymous namespace

 void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
    const std::size_t worker_sz = WorkerBufferSize(channel_count);
    ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");

-    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
-        static_cast<OpusDecoder*>(operator new(worker_sz))};
+    OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
    if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
        LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
        IPC::ResponseBuilder rb{ctx, 2};
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate,
-                                                     channel_count);
+    rb.PushIpcInterface<IHardwareOpusDecoderManager>(
+        std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
 }

 HwOpus::HwOpus() : ServiceFramework("hwopus") {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -733,7 +733,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
 FSP_SRV::~FSP_SRV() = default;

 void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) {
-    LOG_WARNING(Service_FS, "(STUBBED) called");
+    IPC::RequestParser rp{ctx};
+    current_process_id = rp.Pop<u64>();
+
+    LOG_DEBUG(Service_FS, "called. current_process_id=0x{:016X}", current_process_id);

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -32,6 +32,7 @@ private:
    void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);

    FileSys::VirtualFile romfs;
+    u64 current_process_id = 0;
 };

 } // namespace Service::FileSystem
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
 #include "common/common_types.h"
 #include "common/swap.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Service::HID {
 class ControllerBase {
 public:
@@ -20,7 +24,8 @@ public:
    virtual void OnRelease() = 0;

    // When the controller is requesting an update for the shared memory
-    virtual void OnUpdate(u8* data, std::size_t size) = 0;
+    virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                          std::size_t size) = 0;

    // Called when input devices should be loaded
    virtual void OnLoadInputDevices() = 0;
--- a/Show More
+++ b/Show More