core: Use a raw pointer in GetGPUDebugContext.

This helper is called very often. The memory ownership shall not be transfered, so just return the raw pointer.
command_processor: Use std::array for bound_engines.
2018-09-04 14:10:05 +02:00 · 2018-09-04 14:10:05 +02:00 · 2018-09-03 21:21:12 -04:00 · 2018-09-03 21:20:34 -04:00 · 2018-09-03 16:22:13 -04:00 · 2018-09-03 08:04:24 -04:00
110 changed files with 1984 additions and 1201 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,7 @@ matrix:
      install: "./.travis/linux/deps.sh"
      script: "./.travis/linux/build.sh"
      after_success: "./.travis/linux/upload.sh"
+      cache: ccache
    - os: osx
      env: NAME="macos build"
      sudo: false
@@ -27,6 +28,7 @@ matrix:
      install: "./.travis/macos/deps.sh"
      script: "./.travis/macos/build.sh"
      after_success: "./.travis/macos/upload.sh"
+      cache: ccache

 deploy:
  provider: releases
@@ -42,7 +44,3 @@ notifications:
  webhooks:
    urls:
      - https://api.yuzu-emu.org/code/travis/notify
-
-cache:
-  directories:
-    - $HOME/.ccache
--- a/.travis/common/travis-ci.env
+++ b/.travis/common/travis-ci.env
@@ -0,0 +1,12 @@
+# List of environment variables to be shared with Docker containers
+CI
+TRAVIS
+CONTINUOUS_INTEGRATION
+TRAVIS_BRANCH
+TRAVIS_BUILD_ID
+TRAVIS_BUILD_NUMBER
+TRAVIS_COMMIT
+TRAVIS_JOB_ID
+TRAVIS_JOB_NUMBER
+TRAVIS_REPO_SLUG
+TRAVIS_TAG
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,3 +1,4 @@
 #!/bin/bash -ex

-docker run -e CCACHE_DIR=/ccache -v $HOME/.ccache:/ccache -v $(pwd):/yuzu ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
+mkdir -p "$HOME/.ccache"
+docker run --env-file .travis/common/travis-ci.env -v $(pwd):/yuzu -v "$HOME/.ccache":/root/.ccache ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -5,14 +5,8 @@ apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev

 cd /yuzu

-export PATH=/usr/lib/ccache:$PATH
-ln -sf /usr/bin/ccache /usr/lib/ccache/cc
-ln -sf /usr/bin/ccache /usr/lib/ccache/c++
 mkdir build && cd build
-ccache --show-stats > ccache_before
-cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -G Ninja
+cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -G Ninja
 ninja
-ccache --show-stats > ccache_after
-diff -U100 ccache_before ccache_after || true

 ctest -VV -C Release
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -5,14 +5,11 @@ set -o pipefail
 export MACOSX_DEPLOYMENT_TARGET=10.12
 export Qt5_DIR=$(brew --prefix)/opt/qt5
 export UNICORNDIR=$(pwd)/externals/unicorn
+export PATH="/usr/local/opt/ccache/libexec:$PATH"

 mkdir build && cd build
-export PATH=/usr/local/opt/ccache/libexec:$PATH
-ccache --show-stats > ccache_before
 cmake --version
-cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release
+cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON
 make -j4
-ccache --show-stats > ccache_after
-diff -U100 ccache_before ccache_after || true

 ctest -VV -C Release
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,6 +41,19 @@ function(check_submodules_present)
 endfunction()
 check_submodules_present()

+configure_file(${CMAKE_SOURCE_DIR}/dist/compatibility_list/compatibility_list.qrc
+               ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
+               COPYONLY)
+if (ENABLE_COMPATIBILITY_LIST_DOWNLOAD AND NOT EXISTS ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
+    message(STATUS "Downloading compatibility list for yuzu...")
+    file(DOWNLOAD
+        https://api.yuzu-emu.org/gamedb/
+        "${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.json" SHOW_PROGRESS)
+endif()
+if (NOT EXISTS ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
+    file(WRITE ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.json "")
+endif()
+
 # Detect current compilation architecture and create standard definitions
 # =======================================================================

--- a/appveyor.yml
+++ b/appveyor.yml
@@ -41,9 +41,9 @@ before_build:
  - ps: |
        if ($env:BUILD_TYPE -eq 'msvc') {
          # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 .. 2>&1 && exit 0'
+          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_BUNDLED_SDL2=1 -DYUZU_USE_BUNDLED_UNICORN=1 -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON .. 2>&1 && exit 0'
        } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release .. 2>&1"
+          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DYUZU_BUILD_UNICORN=1 -DCMAKE_BUILD_TYPE=Release -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON .. 2>&1"
        }
  - cd ..

@@ -162,10 +162,6 @@ artifacts:
  - path: $(BUILD_ZIP)
    name: build
    type: zip
-  - path: $(BUILD_SYMBOLS)
-    name: debugsymbols
-  - path: $(BUILD_UPDATE)
-    name: update

 deploy:
  provider: GitHub
--- a/dist/compatibility_list/compatibility_list.qrc
+++ b/dist/compatibility_list/compatibility_list.qrc
@@ -0,0 +1,5 @@
+<RCC>
+  <qresource prefix="compatibility_list">
+      <file>compatibility_list.json</file>
+  </qresource>
+</RCC>
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -2,24 +2,36 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <array>
+#include <map>
 #include <memory>
+#include <thread>
 #include <utility>
+
 #include "common/logging/log.h"
 #include "common/string_util.h"
+#include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/file_sys/mode.h"
+#include "core/file_sys/vfs_concat.h"
+#include "core/file_sys/vfs_real.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/service/service.h"
 #include "core/hle/service/sm/controller.h"
 #include "core/hle/service/sm/sm.h"
 #include "core/loader/loader.h"
+#include "core/perf_stats.h"
 #include "core/settings.h"
-#include "file_sys/vfs_concat.h"
-#include "file_sys/vfs_real.h"
+#include "core/telemetry_session.h"
+#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/gpu.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -27,71 +39,9 @@ namespace Core {

 /*static*/ System System::s_instance;

-System::System() = default;
-
-System::~System() = default;
-
-/// Runs a CPU core while the system is powered on
-static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
-    while (Core::System::GetInstance().IsPoweredOn()) {
-        cpu_state->RunLoop(true);
-    }
-}
-
-Cpu& System::CurrentCpuCore() {
-    // If multicore is enabled, use host thread to figure out the current CPU core
-    if (Settings::values.use_multi_core) {
-        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
-        ASSERT(search != thread_to_cpu.end());
-        ASSERT(search->second);
-        return *search->second;
-    }
-
-    // Otherwise, use single-threaded mode active_core variable
-    return *cpu_cores[active_core];
-}
-
-System::ResultStatus System::RunLoop(bool tight_loop) {
-    status = ResultStatus::Success;
-
-    // Update thread_to_cpu in case Core 0 is run from a different host thread
-    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
-
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::HandlePacket();
-
-        // If the loop is halted and we want to step, use a tiny (1) number of instructions to
-        // execute. Otherwise, get out of the loop function.
-        if (GDBStub::GetCpuHaltFlag()) {
-            if (GDBStub::GetCpuStepFlag()) {
-                tight_loop = false;
-            } else {
-                return ResultStatus::Success;
-            }
-        }
-    }
-
-    for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
-        cpu_cores[active_core]->RunLoop(tight_loop);
-        if (Settings::values.use_multi_core) {
-            // Cores 1-3 are run on other threads in this mode
-            break;
-        }
-    }
-
-    if (GDBStub::IsServerEnabled()) {
-        GDBStub::SetCpuStepFlag(false);
-    }
-
-    return status;
-}
-
-System::ResultStatus System::SingleStep() {
-    return RunLoop(false);
-}
-
-static FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
-                                                const std::string& path) {
+namespace {
+FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
+                                         const std::string& path) {
    // To account for split 00+01+etc files.
    std::string dir_name;
    std::string filename;
@@ -121,165 +71,402 @@ static FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem
    return vfs->OpenFile(path, FileSys::Mode::Read);
 }

+/// Runs a CPU core while the system is powered on
+void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System::GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+} // Anonymous namespace
+
+struct System::Impl {
+    Cpu& CurrentCpuCore() {
+        if (Settings::values.use_multi_core) {
+            const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+            ASSERT(search != thread_to_cpu.end());
+            ASSERT(search->second);
+            return *search->second;
+        }
+
+        // Otherwise, use single-threaded mode active_core variable
+        return *cpu_cores[active_core];
+    }
+
+    ResultStatus RunLoop(bool tight_loop) {
+        status = ResultStatus::Success;
+
+        // Update thread_to_cpu in case Core 0 is run from a different host thread
+        thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+
+        if (GDBStub::IsServerEnabled()) {
+            GDBStub::HandlePacket();
+
+            // If the loop is halted and we want to step, use a tiny (1) number of instructions to
+            // execute. Otherwise, get out of the loop function.
+            if (GDBStub::GetCpuHaltFlag()) {
+                if (GDBStub::GetCpuStepFlag()) {
+                    tight_loop = false;
+                } else {
+                    return ResultStatus::Success;
+                }
+            }
+        }
+
+        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+            cpu_cores[active_core]->RunLoop(tight_loop);
+            if (Settings::values.use_multi_core) {
+                // Cores 1-3 are run on other threads in this mode
+                break;
+            }
+        }
+
+        if (GDBStub::IsServerEnabled()) {
+            GDBStub::SetCpuStepFlag(false);
+        }
+
+        return status;
+    }
+
+    ResultStatus Init(Frontend::EmuWindow& emu_window) {
+        LOG_DEBUG(HW_Memory, "initialized OK");
+
+        CoreTiming::Init();
+        kernel.Initialize();
+
+        // Create a default fs if one doesn't already exist.
+        if (virtual_filesystem == nullptr)
+            virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
+
+        current_process = Kernel::Process::Create(kernel, "main");
+
+        cpu_barrier = std::make_shared<CpuBarrier>();
+        cpu_exclusive_monitor = Cpu::MakeExclusiveMonitor(cpu_cores.size());
+        for (size_t index = 0; index < cpu_cores.size(); ++index) {
+            cpu_cores[index] = std::make_shared<Cpu>(cpu_exclusive_monitor, cpu_barrier, index);
+        }
+
+        telemetry_session = std::make_unique<Core::TelemetrySession>();
+        service_manager = std::make_shared<Service::SM::ServiceManager>();
+
+        Service::Init(service_manager, virtual_filesystem);
+        GDBStub::Init();
+
+        renderer = VideoCore::CreateRenderer(emu_window);
+        if (!renderer->Init()) {
+            return ResultStatus::ErrorVideoCore;
+        }
+
+        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+
+        // Create threads for CPU cores 1-3, and build thread_to_cpu map
+        // CPU core 0 is run on the main thread
+        thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+        if (Settings::values.use_multi_core) {
+            for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+                cpu_core_threads[index] =
+                    std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+                thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+            }
+        }
+
+        LOG_DEBUG(Core, "Initialized OK");
+
+        // Reset counters and set time origin to current frame
+        GetAndResetPerfStats();
+        perf_stats.BeginSystemFrame();
+
+        return ResultStatus::Success;
+    }
+
+    ResultStatus Load(Frontend::EmuWindow& emu_window, const std::string& filepath) {
+        app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath));
+
+        if (!app_loader) {
+            LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
+            return ResultStatus::ErrorGetLoader;
+        }
+        std::pair<boost::optional<u32>, Loader::ResultStatus> system_mode =
+            app_loader->LoadKernelSystemMode();
+
+        if (system_mode.second != Loader::ResultStatus::Success) {
+            LOG_CRITICAL(Core, "Failed to determine system mode (Error {})!",
+                         static_cast<int>(system_mode.second));
+
+            return ResultStatus::ErrorSystemMode;
+        }
+
+        ResultStatus init_result{Init(emu_window)};
+        if (init_result != ResultStatus::Success) {
+            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
+                         static_cast<int>(init_result));
+            Shutdown();
+            return init_result;
+        }
+
+        const Loader::ResultStatus load_result{app_loader->Load(current_process)};
+        if (load_result != Loader::ResultStatus::Success) {
+            LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result));
+            Shutdown();
+
+            return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
+                                             static_cast<u32>(load_result));
+        }
+        status = ResultStatus::Success;
+        return status;
+    }
+
+    void Shutdown() {
+        // Log last frame performance stats
+        auto perf_results = GetAndResetPerfStats();
+        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
+                             perf_results.emulation_speed * 100.0);
+        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
+                             perf_results.game_fps);
+        Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
+                             perf_results.frametime * 1000.0);
+
+        // Shutdown emulation session
+        renderer.reset();
+        GDBStub::Shutdown();
+        Service::Shutdown();
+        service_manager.reset();
+        telemetry_session.reset();
+        gpu_core.reset();
+
+        // Close all CPU/threading state
+        cpu_barrier->NotifyEnd();
+        if (Settings::values.use_multi_core) {
+            for (auto& thread : cpu_core_threads) {
+                thread->join();
+                thread.reset();
+            }
+        }
+        thread_to_cpu.clear();
+        for (auto& cpu_core : cpu_cores) {
+            cpu_core.reset();
+        }
+        cpu_barrier.reset();
+
+        // Shutdown kernel and core timing
+        kernel.Shutdown();
+        CoreTiming::Shutdown();
+
+        // Close app loader
+        app_loader.reset();
+
+        LOG_DEBUG(Core, "Shutdown OK");
+    }
+
+    Loader::ResultStatus GetGameName(std::string& out) const {
+        if (app_loader == nullptr)
+            return Loader::ResultStatus::ErrorNotInitialized;
+        return app_loader->ReadTitle(out);
+    }
+
+    void SetStatus(ResultStatus new_status, const char* details = nullptr) {
+        status = new_status;
+        if (details) {
+            status_details = details;
+        }
+    }
+
+    PerfStatsResults GetAndResetPerfStats() {
+        return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
+    }
+
+    Kernel::KernelCore kernel;
+    /// RealVfsFilesystem instance
+    FileSys::VirtualFilesystem virtual_filesystem;
+    /// AppLoader used to load the current executing application
+    std::unique_ptr<Loader::AppLoader> app_loader;
+    std::unique_ptr<VideoCore::RendererBase> renderer;
+    std::unique_ptr<Tegra::GPU> gpu_core;
+    std::shared_ptr<Tegra::DebugContext> debug_context;
+    Kernel::SharedPtr<Kernel::Process> current_process;
+    std::shared_ptr<ExclusiveMonitor> cpu_exclusive_monitor;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
+    size_t active_core{}; ///< Active core, only used in single thread mode
+
+    /// Service manager
+    std::shared_ptr<Service::SM::ServiceManager> service_manager;
+
+    /// Telemetry session for this emulation session
+    std::unique_ptr<Core::TelemetrySession> telemetry_session;
+
+    ResultStatus status = ResultStatus::Success;
+    std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
+
+    Core::PerfStats perf_stats;
+    Core::FrameLimiter frame_limiter;
+};
+
+System::System() : impl{std::make_unique<Impl>()} {}
+System::~System() = default;
+
+Cpu& System::CurrentCpuCore() {
+    return impl->CurrentCpuCore();
+}
+
+System::ResultStatus System::RunLoop(bool tight_loop) {
+    return impl->RunLoop(tight_loop);
+}
+
+System::ResultStatus System::SingleStep() {
+    return RunLoop(false);
+}
+
+void System::InvalidateCpuInstructionCaches() {
+    for (auto& cpu : impl->cpu_cores) {
+        cpu->ArmInterface().ClearInstructionCache();
+    }
+}
+
 System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::string& filepath) {
-    app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath));
+    return impl->Load(emu_window, filepath);
+}

-    if (!app_loader) {
-        LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
-        return ResultStatus::ErrorGetLoader;
-    }
-    std::pair<boost::optional<u32>, Loader::ResultStatus> system_mode =
-        app_loader->LoadKernelSystemMode();
-
-    if (system_mode.second != Loader::ResultStatus::Success) {
-        LOG_CRITICAL(Core, "Failed to determine system mode (Error {})!",
-                     static_cast<int>(system_mode.second));
-
-        return ResultStatus::ErrorSystemMode;
-    }
-
-    ResultStatus init_result{Init(emu_window)};
-    if (init_result != ResultStatus::Success) {
-        LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
-                     static_cast<int>(init_result));
-        System::Shutdown();
-        return init_result;
-    }
-
-    const Loader::ResultStatus load_result{app_loader->Load(current_process)};
-    if (load_result != Loader::ResultStatus::Success) {
-        LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result));
-        System::Shutdown();
-
-        return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
-                                         static_cast<u32>(load_result));
-    }
-    status = ResultStatus::Success;
-    return status;
+bool System::IsPoweredOn() const {
+    return impl->cpu_barrier && impl->cpu_barrier->IsAlive();
 }

 void System::PrepareReschedule() {
    CurrentCpuCore().PrepareReschedule();
 }

-PerfStats::Results System::GetAndResetPerfStats() {
-    return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
+PerfStatsResults System::GetAndResetPerfStats() {
+    return impl->GetAndResetPerfStats();
+}
+
+Core::TelemetrySession& System::TelemetrySession() const {
+    return *impl->telemetry_session;
+}
+
+ARM_Interface& System::CurrentArmInterface() {
+    return CurrentCpuCore().ArmInterface();
+}
+
+size_t System::CurrentCoreIndex() {
+    return CurrentCpuCore().CoreIndex();
+}
+
+Kernel::Scheduler& System::CurrentScheduler() {
+    return *CurrentCpuCore().Scheduler();
 }

 const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
    ASSERT(core_index < NUM_CPU_CORES);
-    return cpu_cores[core_index]->Scheduler();
+    return impl->cpu_cores[core_index]->Scheduler();
+}
+
+Kernel::SharedPtr<Kernel::Process>& System::CurrentProcess() {
+    return impl->current_process;
 }

 ARM_Interface& System::ArmInterface(size_t core_index) {
    ASSERT(core_index < NUM_CPU_CORES);
-    return cpu_cores[core_index]->ArmInterface();
+    return impl->cpu_cores[core_index]->ArmInterface();
 }

 Cpu& System::CpuCore(size_t core_index) {
    ASSERT(core_index < NUM_CPU_CORES);
-    return *cpu_cores[core_index];
+    return *impl->cpu_cores[core_index];
+}
+
+ExclusiveMonitor& System::Monitor() {
+    return *impl->cpu_exclusive_monitor;
+}
+
+Tegra::GPU& System::GPU() {
+    return *impl->gpu_core;
+}
+
+const Tegra::GPU& System::GPU() const {
+    return *impl->gpu_core;
+}
+
+VideoCore::RendererBase& System::Renderer() {
+    return *impl->renderer;
+}
+
+const VideoCore::RendererBase& System::Renderer() const {
+    return *impl->renderer;
+}
+
+Kernel::KernelCore& System::Kernel() {
+    return impl->kernel;
+}
+
+const Kernel::KernelCore& System::Kernel() const {
+    return impl->kernel;
+}
+
+Core::PerfStats& System::GetPerfStats() {
+    return impl->perf_stats;
+}
+
+const Core::PerfStats& System::GetPerfStats() const {
+    return impl->perf_stats;
+}
+
+Core::FrameLimiter& System::FrameLimiter() {
+    return impl->frame_limiter;
+}
+
+const Core::FrameLimiter& System::FrameLimiter() const {
+    return impl->frame_limiter;
+}
+
+Loader::ResultStatus System::GetGameName(std::string& out) const {
+    return impl->GetGameName(out);
+}
+
+void System::SetStatus(ResultStatus new_status, const char* details) {
+    impl->SetStatus(new_status, details);
+}
+
+const std::string& System::GetStatusDetails() const {
+    return impl->status_details;
+}
+
+Loader::AppLoader& System::GetAppLoader() const {
+    return *impl->app_loader;
+}
+
+void System::SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
+    impl->debug_context = std::move(context);
+}
+
+Tegra::DebugContext* System::GetGPUDebugContext() const {
+    return impl->debug_context.get();
+}
+
+void System::SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs) {
+    impl->virtual_filesystem = std::move(vfs);
+}
+
+std::shared_ptr<FileSys::VfsFilesystem> System::GetFilesystem() const {
+    return impl->virtual_filesystem;
 }

 System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) {
-    LOG_DEBUG(HW_Memory, "initialized OK");
-
-    CoreTiming::Init();
-
-    // Create a default fs if one doesn't already exist.
-    if (virtual_filesystem == nullptr)
-        virtual_filesystem = std::make_shared<FileSys::RealVfsFilesystem>();
-
-    current_process = Kernel::Process::Create("main");
-
-    cpu_barrier = std::make_shared<CpuBarrier>();
-    cpu_exclusive_monitor = Cpu::MakeExclusiveMonitor(cpu_cores.size());
-    for (size_t index = 0; index < cpu_cores.size(); ++index) {
-        cpu_cores[index] = std::make_shared<Cpu>(cpu_exclusive_monitor, cpu_barrier, index);
-    }
-
-    telemetry_session = std::make_unique<Core::TelemetrySession>();
-    service_manager = std::make_shared<Service::SM::ServiceManager>();
-
-    Kernel::Init();
-    Service::Init(service_manager, virtual_filesystem);
-    GDBStub::Init();
-
-    renderer = VideoCore::CreateRenderer(emu_window);
-    if (!renderer->Init()) {
-        return ResultStatus::ErrorVideoCore;
-    }
-
-    gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
-
-    // Create threads for CPU cores 1-3, and build thread_to_cpu map
-    // CPU core 0 is run on the main thread
-    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
-    if (Settings::values.use_multi_core) {
-        for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
-            cpu_core_threads[index] =
-                std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
-            thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
-        }
-    }
-
-    LOG_DEBUG(Core, "Initialized OK");
-
-    // Reset counters and set time origin to current frame
-    GetAndResetPerfStats();
-    perf_stats.BeginSystemFrame();
-
-    return ResultStatus::Success;
+    return impl->Init(emu_window);
 }

 void System::Shutdown() {
-    // Log last frame performance stats
-    auto perf_results = GetAndResetPerfStats();
-    Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed",
-                         perf_results.emulation_speed * 100.0);
-    Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
-                         perf_results.game_fps);
-    Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
-                         perf_results.frametime * 1000.0);
-
-    // Shutdown emulation session
-    renderer.reset();
-    GDBStub::Shutdown();
-    Service::Shutdown();
-    Kernel::Shutdown();
-    service_manager.reset();
-    telemetry_session.reset();
-    gpu_core.reset();
-
-    // Close all CPU/threading state
-    cpu_barrier->NotifyEnd();
-    if (Settings::values.use_multi_core) {
-        for (auto& thread : cpu_core_threads) {
-            thread->join();
-            thread.reset();
-        }
-    }
-    thread_to_cpu.clear();
-    for (auto& cpu_core : cpu_cores) {
-        cpu_core.reset();
-    }
-    cpu_barrier.reset();
-
-    // Close core timing
-    CoreTiming::Shutdown();
-
-    // Close app loader
-    app_loader.reset();
-
-    LOG_DEBUG(Core, "Shutdown OK");
+    impl->Shutdown();
 }

 Service::SM::ServiceManager& System::ServiceManager() {
-    return *service_manager;
+    return *impl->service_manager;
 }

 const Service::SM::ServiceManager& System::ServiceManager() const {
-    return *service_manager;
+    return *impl->service_manager;
 }

 } // namespace Core
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -4,40 +4,55 @@

 #pragma once

-#include <array>
-#include <map>
+#include <cstddef>
 #include <memory>
 #include <string>
-#include <thread>
+
 #include "common/common_types.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/core_cpu.h"
 #include "core/hle/kernel/object.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/loader/loader.h"
-#include "core/memory.h"
-#include "core/perf_stats.h"
-#include "core/telemetry_session.h"
-#include "file_sys/vfs_real.h"
-#include "hle/service/filesystem/filesystem.h"
-#include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu.h"

 namespace Core::Frontend {
 class EmuWindow;
-}
+} // namespace Core::Frontend
+
+namespace FileSys {
+class VfsFilesystem;
+} // namespace FileSys
+
+namespace Kernel {
+class KernelCore;
+class Process;
+class Scheduler;
+} // namespace Kernel
+
+namespace Loader {
+class AppLoader;
+enum class ResultStatus : u16;
+} // namespace Loader

 namespace Service::SM {
 class ServiceManager;
-}
+} // namespace Service::SM
+
+namespace Tegra {
+class DebugContext;
+class GPU;
+} // namespace Tegra

 namespace VideoCore {
 class RendererBase;
-}
+} // namespace VideoCore

 namespace Core {

 class ARM_Interface;
+class Cpu;
+class ExclusiveMonitor;
+class FrameLimiter;
+class PerfStats;
+class TelemetrySession;
+
+struct PerfStatsResults;

 class System {
 public:
@@ -93,11 +108,7 @@ public:
     * This function should only be used by GDB Stub to support breakpoints, memory updates and
     * step/continue commands.
     */
-    void InvalidateCpuInstructionCaches() {
-        for (auto& cpu : cpu_cores) {
-            cpu->ArmInterface().ClearInstructionCache();
-        }
-    }
+    void InvalidateCpuInstructionCaches();

    /// Shutdown the emulated system.
    void Shutdown();
@@ -116,33 +127,28 @@ public:
     * application).
     * @returns True if the emulated system is powered on, otherwise false.
     */
-    bool IsPoweredOn() const {
-        return cpu_barrier && cpu_barrier->IsAlive();
-    }
+    bool IsPoweredOn() const;

    /**
     * Returns a reference to the telemetry session for this emulation session.
     * @returns Reference to the telemetry session.
     */
-    Core::TelemetrySession& TelemetrySession() const {
-        return *telemetry_session;
-    }
+    Core::TelemetrySession& TelemetrySession() const;

    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();

    /// Gets and resets core performance statistics
-    PerfStats::Results GetAndResetPerfStats();
+    PerfStatsResults GetAndResetPerfStats();

    /// Gets an ARM interface to the CPU core that is currently running
-    ARM_Interface& CurrentArmInterface() {
-        return CurrentCpuCore().ArmInterface();
-    }
+    ARM_Interface& CurrentArmInterface();

    /// Gets the index of the currently running CPU core
-    size_t CurrentCoreIndex() {
-        return CurrentCpuCore().CoreIndex();
-    }
+    size_t CurrentCoreIndex();
+
+    /// Gets the scheduler for the CPU core that is currently running
+    Kernel::Scheduler& CurrentScheduler();

    /// Gets an ARM interface to the CPU core with the specified index
    ARM_Interface& ArmInterface(size_t core_index);
@@ -150,87 +156,64 @@ public:
    /// Gets a CPU interface to the CPU core with the specified index
    Cpu& CpuCore(size_t core_index);

+    /// Gets the exclusive monitor
+    ExclusiveMonitor& Monitor();
+
    /// Gets a mutable reference to the GPU interface
-    Tegra::GPU& GPU() {
-        return *gpu_core;
-    }
+    Tegra::GPU& GPU();

    /// Gets an immutable reference to the GPU interface.
-    const Tegra::GPU& GPU() const {
-        return *gpu_core;
-    }
+    const Tegra::GPU& GPU() const;

    /// Gets a mutable reference to the renderer.
-    VideoCore::RendererBase& Renderer() {
-        return *renderer;
-    }
+    VideoCore::RendererBase& Renderer();

    /// Gets an immutable reference to the renderer.
-    const VideoCore::RendererBase& Renderer() const {
-        return *renderer;
-    }
-
-    /// Gets the scheduler for the CPU core that is currently running
-    Kernel::Scheduler& CurrentScheduler() {
-        return *CurrentCpuCore().Scheduler();
-    }
-
-    /// Gets the exclusive monitor
-    ExclusiveMonitor& Monitor() {
-        return *cpu_exclusive_monitor;
-    }
+    const VideoCore::RendererBase& Renderer() const;

    /// Gets the scheduler for the CPU core with the specified index
    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);

    /// Gets the current process
-    Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
-        return current_process;
-    }
+    Kernel::SharedPtr<Kernel::Process>& CurrentProcess();
+
+    /// Provides a reference to the kernel instance.
+    Kernel::KernelCore& Kernel();
+
+    /// Provides a constant reference to the kernel instance.
+    const Kernel::KernelCore& Kernel() const;
+
+    /// Provides a reference to the internal PerfStats instance.
+    Core::PerfStats& GetPerfStats();
+
+    /// Provides a constant reference to the internal PerfStats instance.
+    const Core::PerfStats& GetPerfStats() const;
+
+    /// Provides a reference to the frame limiter;
+    Core::FrameLimiter& FrameLimiter();
+
+    /// Provides a constant referent to the frame limiter
+    const Core::FrameLimiter& FrameLimiter() const;

    /// Gets the name of the current game
-    Loader::ResultStatus GetGameName(std::string& out) const {
-        if (app_loader == nullptr)
-            return Loader::ResultStatus::ErrorNotInitialized;
-        return app_loader->ReadTitle(out);
-    }
+    Loader::ResultStatus GetGameName(std::string& out) const;

-    PerfStats perf_stats;
-    FrameLimiter frame_limiter;
+    void SetStatus(ResultStatus new_status, const char* details);

-    void SetStatus(ResultStatus new_status, const char* details = nullptr) {
-        status = new_status;
-        if (details) {
-            status_details = details;
-        }
-    }
+    const std::string& GetStatusDetails() const;

-    const std::string& GetStatusDetails() const {
-        return status_details;
-    }
-
-    Loader::AppLoader& GetAppLoader() const {
-        return *app_loader;
-    }
+    Loader::AppLoader& GetAppLoader() const;

    Service::SM::ServiceManager& ServiceManager();
    const Service::SM::ServiceManager& ServiceManager() const;

-    void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context) {
-        debug_context = std::move(context);
-    }
+    void SetGPUDebugContext(std::shared_ptr<Tegra::DebugContext> context);

-    std::shared_ptr<Tegra::DebugContext> GetGPUDebugContext() const {
-        return debug_context;
-    }
+    Tegra::DebugContext* GetGPUDebugContext() const;

-    void SetFilesystem(FileSys::VirtualFilesystem vfs) {
-        virtual_filesystem = std::move(vfs);
-    }
+    void SetFilesystem(std::shared_ptr<FileSys::VfsFilesystem> vfs);

-    FileSys::VirtualFilesystem GetFilesystem() const {
-        return virtual_filesystem;
-    }
+    std::shared_ptr<FileSys::VfsFilesystem> GetFilesystem() const;

 private:
    System();
@@ -246,33 +229,10 @@ private:
     */
    ResultStatus Init(Frontend::EmuWindow& emu_window);

-    /// RealVfsFilesystem instance
-    FileSys::VirtualFilesystem virtual_filesystem;
-    /// AppLoader used to load the current executing application
-    std::unique_ptr<Loader::AppLoader> app_loader;
-    std::unique_ptr<VideoCore::RendererBase> renderer;
-    std::unique_ptr<Tegra::GPU> gpu_core;
-    std::shared_ptr<Tegra::DebugContext> debug_context;
-    Kernel::SharedPtr<Kernel::Process> current_process;
-    std::shared_ptr<ExclusiveMonitor> cpu_exclusive_monitor;
-    std::shared_ptr<CpuBarrier> cpu_barrier;
-    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
-    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
-    size_t active_core{}; ///< Active core, only used in single thread mode
-
-    /// Service manager
-    std::shared_ptr<Service::SM::ServiceManager> service_manager;
-
-    /// Telemetry session for this emulation session
-    std::unique_ptr<Core::TelemetrySession> telemetry_session;
+    struct Impl;
+    std::unique_ptr<Impl> impl;

    static System s_instance;
-
-    ResultStatus status = ResultStatus::Success;
-    std::string status_details = "";
-
-    /// Map of guest threads to CPU cores
-    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };

 inline ARM_Interface& CurrentArmInterface() {
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -5,9 +5,9 @@
 #include <regex>
 #include <mbedtls/sha256.h>
 #include "common/assert.h"
+#include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
-#include "core/crypto/encryption_layer.h"
 #include "core/file_sys/card_image.h"
 #include "core/file_sys/nca_metadata.h"
 #include "core/file_sys/registered_cache.h"
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -7,6 +7,7 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/file_sys/savedata_factory.h"
+#include "core/file_sys/vfs.h"
 #include "core/hle/kernel/process.h"

 namespace FileSys {
--- a/src/core/file_sys/savedata_factory.h
+++ b/src/core/file_sys/savedata_factory.h
@@ -8,6 +8,7 @@
 #include <string>
 #include "common/common_types.h"
 #include "common/swap.h"
+#include "core/file_sys/vfs.h"
 #include "core/hle/result.h"

 namespace FileSys {
--- a/src/core/file_sys/vfs_real.cpp
+++ b/src/core/file_sys/vfs_real.cpp
@@ -8,6 +8,7 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/common_paths.h"
+#include "common/file_util.h"
 #include "common/logging/log.h"
 #include "core/file_sys/vfs_real.h"

@@ -39,6 +40,7 @@ static std::string ModeFlagsToString(Mode mode) {
 }

 RealVfsFilesystem::RealVfsFilesystem() : VfsFilesystem(nullptr) {}
+RealVfsFilesystem::~RealVfsFilesystem() = default;

 std::string RealVfsFilesystem::GetName() const {
    return "Real";
@@ -219,6 +221,8 @@ RealVfsFile::RealVfsFile(RealVfsFilesystem& base_, std::shared_ptr<FileUtil::IOF
      parent_components(FileUtil::SliceVector(path_components, 0, path_components.size() - 1)),
      perms(perms_) {}

+RealVfsFile::~RealVfsFile() = default;
+
 std::string RealVfsFile::GetName() const {
    return path_components.back();
 }
@@ -312,6 +316,8 @@ RealVfsDirectory::RealVfsDirectory(RealVfsFilesystem& base_, const std::string&
        FileUtil::CreateDir(path);
 }

+RealVfsDirectory::~RealVfsDirectory() = default;
+
 std::shared_ptr<VfsFile> RealVfsDirectory::GetFileRelative(std::string_view path) const {
    const auto full_path = FileUtil::SanitizePath(this->path + DIR_SEP + std::string(path));
    if (!FileUtil::Exists(full_path) || FileUtil::IsDirectory(full_path))
--- a/src/core/file_sys/vfs_real.h
+++ b/src/core/file_sys/vfs_real.h
@@ -6,15 +6,19 @@

 #include <string_view>
 #include <boost/container/flat_map.hpp>
-#include "common/file_util.h"
 #include "core/file_sys/mode.h"
 #include "core/file_sys/vfs.h"

+namespace FileUtil {
+class IOFile;
+}
+
 namespace FileSys {

 class RealVfsFilesystem : public VfsFilesystem {
 public:
    RealVfsFilesystem();
+    ~RealVfsFilesystem() override;

    std::string GetName() const override;
    bool IsReadable() const override;
@@ -40,10 +44,9 @@ class RealVfsFile : public VfsFile {
    friend class RealVfsDirectory;
    friend class RealVfsFilesystem;

-    RealVfsFile(RealVfsFilesystem& base, std::shared_ptr<FileUtil::IOFile> backing,
-                const std::string& path, Mode perms = Mode::Read);
-
 public:
+    ~RealVfsFile() override;
+
    std::string GetName() const override;
    size_t GetSize() const override;
    bool Resize(size_t new_size) override;
@@ -55,6 +58,9 @@ public:
    bool Rename(std::string_view name) override;

 private:
+    RealVfsFile(RealVfsFilesystem& base, std::shared_ptr<FileUtil::IOFile> backing,
+                const std::string& path, Mode perms = Mode::Read);
+
    bool Close();

    RealVfsFilesystem& base;
@@ -70,9 +76,9 @@ private:
 class RealVfsDirectory : public VfsDirectory {
    friend class RealVfsFilesystem;

-    RealVfsDirectory(RealVfsFilesystem& base, const std::string& path, Mode perms = Mode::Read);
-
 public:
+    ~RealVfsDirectory() override;
+
    std::shared_ptr<VfsFile> GetFileRelative(std::string_view path) const override;
    std::shared_ptr<VfsDirectory> GetDirectoryRelative(std::string_view path) const override;
    std::shared_ptr<VfsFile> GetFile(std::string_view name) const override;
@@ -97,6 +103,8 @@ protected:
    bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;

 private:
+    RealVfsDirectory(RealVfsFilesystem& base, const std::string& path, Mode perms = Mode::Read);
+
    template <typename T, typename R>
    std::vector<std::shared_ptr<R>> IterateEntries() const;

--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -10,6 +10,7 @@
 #include <mbedtls/md.h>
 #include <mbedtls/sha256.h>
 #include "common/assert.h"
+#include "common/file_util.h"
 #include "common/hex_util.h"
 #include "common/logging/log.h"
 #include "core/crypto/aes_util.h"
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -12,6 +12,7 @@
 #include <utility>
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/core.h"
 #include "core/hle/ipc.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -135,7 +136,9 @@ public:
        if (context->Session()->IsDomain()) {
            context->AddDomainObject(std::move(iface));
        } else {
-            auto sessions = Kernel::ServerSession::CreateSessionPair(iface->GetServiceName());
+            auto& kernel = Core::System::GetInstance().Kernel();
+            auto sessions =
+                Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName());
            auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions);
            auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions);
            iface->ClientConnected(server);
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -8,9 +8,11 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/result.h"
 #include "core/memory.h"
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -14,7 +14,7 @@

 namespace Kernel {

-ClientPort::ClientPort() = default;
+ClientPort::ClientPort(KernelCore& kernel) : Object{kernel} {}
 ClientPort::~ClientPort() = default;

 ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
@@ -27,7 +27,7 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() {
    active_sessions++;

    // Create a new session pair, let the created sessions inherit the parent port's HLE handler.
-    auto sessions = ServerSession::CreateSessionPair(server_port->GetName(), this);
+    auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this);

    if (server_port->hle_handler)
        server_port->hle_handler->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions));
--- a/src/core/hle/kernel/client_port.h
+++ b/src/core/hle/kernel/client_port.h
@@ -11,8 +11,9 @@

 namespace Kernel {

-class ServerPort;
 class ClientSession;
+class KernelCore;
+class ServerPort;

 class ClientPort final : public Object {
 public:
@@ -44,7 +45,7 @@ public:
    void ConnectionClosed();

 private:
-    ClientPort();
+    explicit ClientPort(KernelCore& kernel);
    ~ClientPort() override;

    SharedPtr<ServerPort> server_port; ///< ServerPort associated with this client port.
--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -11,7 +11,7 @@

 namespace Kernel {

-ClientSession::ClientSession() = default;
+ClientSession::ClientSession(KernelCore& kernel) : Object{kernel} {}
 ClientSession::~ClientSession() {
    // This destructor will be called automatically when the last ClientSession handle is closed by
    // the emulated application.
--- a/src/core/hle/kernel/client_session.h
+++ b/src/core/hle/kernel/client_session.h
@@ -12,8 +12,9 @@

 namespace Kernel {

-class ServerSession;
+class KernelCore;
 class Session;
+class ServerSession;
 class Thread;

 class ClientSession final : public Object {
@@ -41,7 +42,7 @@ public:
    std::shared_ptr<Session> parent;

 private:
-    ClientSession();
+    explicit ClientSession(KernelCore& kernel);
    ~ClientSession() override;
 };

--- a/src/core/hle/kernel/event.cpp
+++ b/src/core/hle/kernel/event.cpp
@@ -10,11 +10,11 @@

 namespace Kernel {

-Event::Event() {}
-Event::~Event() {}
+Event::Event(KernelCore& kernel) : WaitObject{kernel} {}
+Event::~Event() = default;

-SharedPtr<Event> Event::Create(ResetType reset_type, std::string name) {
-    SharedPtr<Event> evt(new Event);
+SharedPtr<Event> Event::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
+    SharedPtr<Event> evt(new Event(kernel));

    evt->signaled = false;
    evt->reset_type = reset_type;
--- a/src/core/hle/kernel/event.h
+++ b/src/core/hle/kernel/event.h
@@ -10,14 +10,18 @@

 namespace Kernel {

+class KernelCore;
+
 class Event final : public WaitObject {
 public:
    /**
     * Creates an event
+     * @param kernel The kernel instance to create this event under.
     * @param reset_type ResetType describing how to create event
     * @param name Optional name of event
     */
-    static SharedPtr<Event> Create(ResetType reset_type, std::string name = "Unknown");
+    static SharedPtr<Event> Create(KernelCore& kernel, ResetType reset_type,
+                                   std::string name = "Unknown");

    std::string GetTypeName() const override {
        return "Event";
@@ -44,7 +48,7 @@ public:
    void Clear();

 private:
-    Event();
+    explicit Event(KernelCore& kernel);
    ~Event() override;

    ResetType reset_type; ///< Current ResetType
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -13,8 +13,6 @@

 namespace Kernel {

-HandleTable g_handle_table;
-
 HandleTable::HandleTable() {
    next_generation = 1;
    Clear();
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -121,6 +121,4 @@ private:
    u16 next_free_slot;
 };

-extern HandleTable g_handle_table;
-
 } // namespace Kernel
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -13,10 +13,12 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_session.h"
@@ -51,7 +53,9 @@ SharedPtr<Event> HLERequestContext::SleepClientThread(SharedPtr<Thread> thread,

    if (!event) {
        // Create event if not provided
-        event = Kernel::Event::Create(Kernel::ResetType::OneShot, "HLE Pause Event: " + reason);
+        auto& kernel = Core::System::GetInstance().Kernel();
+        event =
+            Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "HLE Pause Event: " + reason);
    }

    event->Clear();
@@ -90,12 +94,14 @@ void HLERequestContext::ParseCommandBuffer(u32_le* src_cmdbuf, bool incoming) {
            rp.Skip(2, false);
        }
        if (incoming) {
+            auto& handle_table = Core::System::GetInstance().Kernel().HandleTable();
+
            // Populate the object lists with the data in the IPC request.
            for (u32 handle = 0; handle < handle_descriptor_header->num_handles_to_copy; ++handle) {
-                copy_objects.push_back(Kernel::g_handle_table.GetGeneric(rp.Pop<Handle>()));
+                copy_objects.push_back(handle_table.GetGeneric(rp.Pop<Handle>()));
            }
            for (u32 handle = 0; handle < handle_descriptor_header->num_handles_to_move; ++handle) {
-                move_objects.push_back(Kernel::g_handle_table.GetGeneric(rp.Pop<Handle>()));
+                move_objects.push_back(handle_table.GetGeneric(rp.Pop<Handle>()));
            }
        } else {
            // For responses we just ignore the handles, they're empty and will be populated when
@@ -230,17 +236,19 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(const Thread& thread)
        ASSERT(copy_objects.size() == handle_descriptor_header->num_handles_to_copy);
        ASSERT(move_objects.size() == handle_descriptor_header->num_handles_to_move);

+        auto& handle_table = Core::System::GetInstance().Kernel().HandleTable();
+
        // We don't make a distinction between copy and move handles when translating since HLE
        // services don't deal with handles directly. However, the guest applications might check
        // for specific values in each of these descriptors.
        for (auto& object : copy_objects) {
            ASSERT(object != nullptr);
-            dst_cmdbuf[current_offset++] = Kernel::g_handle_table.Create(object).Unwrap();
+            dst_cmdbuf[current_offset++] = handle_table.Create(object).Unwrap();
        }

        for (auto& object : move_objects) {
            ASSERT(object != nullptr);
-            dst_cmdbuf[current_offset++] = Kernel::g_handle_table.Create(object).Unwrap();
+            dst_cmdbuf[current_offset++] = handle_table.Create(object).Unwrap();
        }
    }

--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -2,38 +2,315 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <array>
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <utility>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/timer.h"
+#include "core/hle/lock.h"
+#include "core/hle/result.h"

 namespace Kernel {

-std::atomic<u32> Object::next_object_id{0};
+/**
+ * Callback that will wake up the thread it was scheduled for
+ * @param thread_handle The handle of the thread that's been awoken
+ * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
+ */
+static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
+    const auto proper_handle = static_cast<Handle>(thread_handle);
+    auto& system = Core::System::GetInstance();

-/// Initialize the kernel
-void Init() {
-    Kernel::ResourceLimitsInit();
-    Kernel::ThreadingInit();
-    Kernel::TimersInit();
+    // Lock the global kernel mutex when we enter the kernel HLE.
+    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);

-    Object::next_object_id = 0;
-    // TODO(Subv): Start the process ids from 10 for now, as lower PIDs are
-    // reserved for low-level services
-    Process::next_process_id = 10;
+    SharedPtr<Thread> thread =
+        system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
+    if (thread == nullptr) {
+        LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
+        return;
+    }
+
+    bool resume = true;
+
+    if (thread->status == ThreadStatus::WaitSynchAny ||
+        thread->status == ThreadStatus::WaitSynchAll ||
+        thread->status == ThreadStatus::WaitHLEEvent) {
+        // Remove the thread from each of its waiting objects' waitlists
+        for (auto& object : thread->wait_objects) {
+            object->RemoveWaitingThread(thread.get());
+        }
+        thread->wait_objects.clear();
+
+        // Invoke the wakeup callback before clearing the wait objects
+        if (thread->wakeup_callback) {
+            resume = thread->wakeup_callback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
+        }
+    }
+
+    if (thread->mutex_wait_address != 0 || thread->condvar_wait_address != 0 ||
+        thread->wait_handle) {
+        ASSERT(thread->status == ThreadStatus::WaitMutex);
+        thread->mutex_wait_address = 0;
+        thread->condvar_wait_address = 0;
+        thread->wait_handle = 0;
+
+        auto lock_owner = thread->lock_owner;
+        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
+        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
+        // wasn't awakened due to the mutex already being acquired.
+        if (lock_owner) {
+            lock_owner->RemoveMutexWaiter(thread);
+        }
+    }
+
+    if (thread->arb_wait_address != 0) {
+        ASSERT(thread->status == ThreadStatus::WaitArb);
+        thread->arb_wait_address = 0;
+    }
+
+    if (resume) {
+        thread->ResumeFromWait();
+    }
 }

-/// Shutdown the kernel
-void Shutdown() {
-    // Free all kernel objects
-    g_handle_table.Clear();
+/// The timer callback event, called when a timer is fired
+static void TimerCallback(u64 timer_handle, int cycles_late) {
+    const auto proper_handle = static_cast<Handle>(timer_handle);
+    auto& system = Core::System::GetInstance();
+    SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);

-    Kernel::ThreadingShutdown();
+    if (timer == nullptr) {
+        LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
+        return;
+    }

-    Kernel::TimersShutdown();
-    Kernel::ResourceLimitsShutdown();
+    timer->Signal(cycles_late);
+}
+
+struct KernelCore::Impl {
+    void Initialize(KernelCore& kernel) {
+        Shutdown();
+
+        InitializeResourceLimits(kernel);
+        InitializeThreads();
+        InitializeTimers();
+    }
+
+    void Shutdown() {
+        next_object_id = 0;
+        next_process_id = 10;
+        next_thread_id = 1;
+
+        process_list.clear();
+
+        handle_table.Clear();
+        resource_limits.fill(nullptr);
+
+        thread_wakeup_callback_handle_table.Clear();
+        thread_wakeup_event_type = nullptr;
+
+        timer_callback_handle_table.Clear();
+        timer_callback_event_type = nullptr;
+
+        named_ports.clear();
+    }
+
+    void InitializeResourceLimits(KernelCore& kernel) {
+        // Create the four resource limits that the system uses
+        // Create the APPLICATION resource limit
+        SharedPtr<ResourceLimit> resource_limit = ResourceLimit::Create(kernel, "Applications");
+        resource_limit->max_priority = 0x18;
+        resource_limit->max_commit = 0x4000000;
+        resource_limit->max_threads = 0x20;
+        resource_limit->max_events = 0x20;
+        resource_limit->max_mutexes = 0x20;
+        resource_limit->max_semaphores = 0x8;
+        resource_limit->max_timers = 0x8;
+        resource_limit->max_shared_mems = 0x10;
+        resource_limit->max_address_arbiters = 0x2;
+        resource_limit->max_cpu_time = 0x1E;
+        resource_limits[static_cast<u8>(ResourceLimitCategory::APPLICATION)] = resource_limit;
+
+        // Create the SYS_APPLET resource limit
+        resource_limit = ResourceLimit::Create(kernel, "System Applets");
+        resource_limit->max_priority = 0x4;
+        resource_limit->max_commit = 0x5E00000;
+        resource_limit->max_threads = 0x1D;
+        resource_limit->max_events = 0xB;
+        resource_limit->max_mutexes = 0x8;
+        resource_limit->max_semaphores = 0x4;
+        resource_limit->max_timers = 0x4;
+        resource_limit->max_shared_mems = 0x8;
+        resource_limit->max_address_arbiters = 0x3;
+        resource_limit->max_cpu_time = 0x2710;
+        resource_limits[static_cast<u8>(ResourceLimitCategory::SYS_APPLET)] = resource_limit;
+
+        // Create the LIB_APPLET resource limit
+        resource_limit = ResourceLimit::Create(kernel, "Library Applets");
+        resource_limit->max_priority = 0x4;
+        resource_limit->max_commit = 0x600000;
+        resource_limit->max_threads = 0xE;
+        resource_limit->max_events = 0x8;
+        resource_limit->max_mutexes = 0x8;
+        resource_limit->max_semaphores = 0x4;
+        resource_limit->max_timers = 0x4;
+        resource_limit->max_shared_mems = 0x8;
+        resource_limit->max_address_arbiters = 0x1;
+        resource_limit->max_cpu_time = 0x2710;
+        resource_limits[static_cast<u8>(ResourceLimitCategory::LIB_APPLET)] = resource_limit;
+
+        // Create the OTHER resource limit
+        resource_limit = ResourceLimit::Create(kernel, "Others");
+        resource_limit->max_priority = 0x4;
+        resource_limit->max_commit = 0x2180000;
+        resource_limit->max_threads = 0xE1;
+        resource_limit->max_events = 0x108;
+        resource_limit->max_mutexes = 0x25;
+        resource_limit->max_semaphores = 0x43;
+        resource_limit->max_timers = 0x2C;
+        resource_limit->max_shared_mems = 0x1F;
+        resource_limit->max_address_arbiters = 0x2D;
+        resource_limit->max_cpu_time = 0x3E8;
+        resource_limits[static_cast<u8>(ResourceLimitCategory::OTHER)] = resource_limit;
+    }
+
+    void InitializeThreads() {
+        thread_wakeup_event_type =
+            CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+    }
+
+    void InitializeTimers() {
+        timer_callback_handle_table.Clear();
+        timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
+    }
+
+    std::atomic<u32> next_object_id{0};
+    // TODO(Subv): Start the process ids from 10 for now, as lower PIDs are
+    // reserved for low-level services
+    std::atomic<u32> next_process_id{10};
+    std::atomic<u32> next_thread_id{1};
+
+    // Lists all processes that exist in the current session.
+    std::vector<SharedPtr<Process>> process_list;
+
+    Kernel::HandleTable handle_table;
+    std::array<SharedPtr<ResourceLimit>, 4> resource_limits;
+
+    /// The event type of the generic timer callback event
+    CoreTiming::EventType* timer_callback_event_type = nullptr;
+    // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future,
+    // allowing us to simply use a pool index or similar.
+    Kernel::HandleTable timer_callback_handle_table;
+
+    CoreTiming::EventType* thread_wakeup_event_type = nullptr;
+    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
+    // allowing us to simply use a pool index or similar.
+    Kernel::HandleTable thread_wakeup_callback_handle_table;
+
+    /// Map of named ports managed by the kernel, which can be retrieved using
+    /// the ConnectToPort SVC.
+    NamedPortTable named_ports;
+};
+
+KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {}
+KernelCore::~KernelCore() {
+    Shutdown();
+}
+
+void KernelCore::Initialize() {
+    impl->Initialize(*this);
+}
+
+void KernelCore::Shutdown() {
+    impl->Shutdown();
+}
+
+Kernel::HandleTable& KernelCore::HandleTable() {
+    return impl->handle_table;
+}
+
+const Kernel::HandleTable& KernelCore::HandleTable() const {
+    return impl->handle_table;
+}
+
+SharedPtr<ResourceLimit> KernelCore::ResourceLimitForCategory(
+    ResourceLimitCategory category) const {
+    return impl->resource_limits.at(static_cast<std::size_t>(category));
+}
+
+SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const {
+    return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
+}
+
+SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const {
+    return impl->timer_callback_handle_table.Get<Timer>(handle);
+}
+
+void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
+    impl->process_list.push_back(std::move(process));
+}
+
+void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
+    impl->named_ports.emplace(std::move(name), std::move(port));
+}
+
+KernelCore::NamedPortTable::iterator KernelCore::FindNamedPort(const std::string& name) {
+    return impl->named_ports.find(name);
+}
+
+KernelCore::NamedPortTable::const_iterator KernelCore::FindNamedPort(
+    const std::string& name) const {
+    return impl->named_ports.find(name);
+}
+
+bool KernelCore::IsValidNamedPort(NamedPortTable::const_iterator port) const {
+    return port != impl->named_ports.cend();
+}
+
+u32 KernelCore::CreateNewObjectID() {
+    return impl->next_object_id++;
+}
+
+u32 KernelCore::CreateNewThreadID() {
+    return impl->next_thread_id++;
+}
+
+u32 KernelCore::CreateNewProcessID() {
+    return impl->next_process_id++;
+}
+
+ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) {
+    return impl->timer_callback_handle_table.Create(timer);
+}
+
+CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
+    return impl->thread_wakeup_event_type;
+}
+
+CoreTiming::EventType* KernelCore::TimerCallbackEventType() const {
+    return impl->timer_callback_event_type;
+}
+
+Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
+    return impl->thread_wakeup_callback_handle_table;
+}
+
+const Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() const {
+    return impl->thread_wakeup_callback_handle_table;
 }

 } // namespace Kernel
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,14 +4,111 @@

 #pragma once

-#include "common/common_types.h"
+#include <string>
+#include <unordered_map>
+#include "core/hle/kernel/object.h"
+
+template <typename T>
+class ResultVal;
+
+namespace CoreTiming {
+struct EventType;
+}

 namespace Kernel {

-/// Initialize the kernel with the specified system mode.
-void Init();
+class ClientPort;
+class HandleTable;
+class Process;
+class ResourceLimit;
+class Thread;
+class Timer;

-/// Shutdown the kernel
-void Shutdown();
+enum class ResourceLimitCategory : u8;
+
+/// Represents a single instance of the kernel.
+class KernelCore {
+private:
+    using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>;
+
+public:
+    KernelCore();
+    ~KernelCore();
+
+    KernelCore(const KernelCore&) = delete;
+    KernelCore& operator=(const KernelCore&) = delete;
+
+    KernelCore(KernelCore&&) = delete;
+    KernelCore& operator=(KernelCore&&) = delete;
+
+    /// Resets the kernel to a clean slate for use.
+    void Initialize();
+
+    /// Clears all resources in use by the kernel instance.
+    void Shutdown();
+
+    /// Provides a reference to the handle table.
+    Kernel::HandleTable& HandleTable();
+
+    /// Provides a const reference to the handle table.
+    const Kernel::HandleTable& HandleTable() const;
+
+    /// Retrieves a shared pointer to a ResourceLimit identified by the given category.
+    SharedPtr<ResourceLimit> ResourceLimitForCategory(ResourceLimitCategory category) const;
+
+    /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
+    SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;
+
+    /// Retrieves a shared pointer to a Timer instance within the timer callback handle table.
+    SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const;
+
+    /// Adds the given shared pointer to an internal list of active processes.
+    void AppendNewProcess(SharedPtr<Process> process);
+
+    /// Adds a port to the named port table
+    void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
+
+    /// Finds a port within the named port table with the given name.
+    NamedPortTable::iterator FindNamedPort(const std::string& name);
+
+    /// Finds a port within the named port table with the given name.
+    NamedPortTable::const_iterator FindNamedPort(const std::string& name) const;
+
+    /// Determines whether or not the given port is a valid named port.
+    bool IsValidNamedPort(NamedPortTable::const_iterator port) const;
+
+private:
+    friend class Object;
+    friend class Process;
+    friend class Thread;
+    friend class Timer;
+
+    /// Creates a new object ID, incrementing the internal object ID counter.
+    u32 CreateNewObjectID();
+
+    /// Creates a new process ID, incrementing the internal process ID counter;
+    u32 CreateNewProcessID();
+
+    /// Creates a new thread ID, incrementing the internal thread ID counter.
+    u32 CreateNewThreadID();
+
+    /// Creates a timer callback handle for the given timer.
+    ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer);
+
+    /// Retrieves the event type used for thread wakeup callbacks.
+    CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
+
+    /// Retrieves the event type used for timer callbacks.
+    CoreTiming::EventType* TimerCallbackEventType() const;
+
+    /// Provides a reference to the thread wakeup callback handle table.
+    Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
+
+    /// Provides a const reference to the thread wakeup callback handle table.
+    const Kernel::HandleTable& ThreadWakeupCallbackHandleTable() const;
+
+    struct Impl;
+    std::unique_ptr<Impl> impl;
+};

 } // namespace Kernel
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -58,15 +58,15 @@ static void TransferMutexOwnership(VAddr mutex_addr, SharedPtr<Thread> current_t
    }
 }

-ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
+ResultCode Mutex::TryAcquire(HandleTable& handle_table, VAddr address, Handle holding_thread_handle,
                             Handle requesting_thread_handle) {
    // The mutex address must be 4-byte aligned
    if ((address % sizeof(u32)) != 0) {
        return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
    }

-    SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle);
-    SharedPtr<Thread> requesting_thread = g_handle_table.Get<Thread>(requesting_thread_handle);
+    SharedPtr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
+    SharedPtr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);

    // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
    // thread.
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -11,6 +11,7 @@ union ResultCode;

 namespace Kernel {

+class HandleTable;
 class Thread;

 class Mutex final {
@@ -21,8 +22,8 @@ public:
    static constexpr u32 MutexOwnerMask = 0xBFFFFFFF;

    /// Attempts to acquire a mutex at the specified address.
-    static ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
-                                 Handle requesting_thread_handle);
+    static ResultCode TryAcquire(HandleTable& handle_table, VAddr address,
+                                 Handle holding_thread_handle, Handle requesting_thread_handle);

    /// Releases the mutex at the specified address.
    static ResultCode Release(VAddr address);
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -3,10 +3,12 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"

 namespace Kernel {

+Object::Object(KernelCore& kernel) : kernel{kernel}, object_id{kernel.CreateNewObjectID()} {}
 Object::~Object() = default;

 bool Object::IsWaitable() const {
--- a/src/core/hle/kernel/object.h
+++ b/src/core/hle/kernel/object.h
@@ -14,6 +14,8 @@

 namespace Kernel {

+class KernelCore;
+
 using Handle = u32;

 enum class HandleType : u32 {
@@ -40,6 +42,7 @@ enum class ResetType {

 class Object : NonCopyable {
 public:
+    explicit Object(KernelCore& kernel);
    virtual ~Object();

    /// Returns a unique identifier for the object. For debugging purposes only.
@@ -61,15 +64,16 @@ public:
     */
    bool IsWaitable() const;

-public:
-    static std::atomic<u32> next_object_id;
+protected:
+    /// The kernel instance this object was created under.
+    KernelCore& kernel;

 private:
    friend void intrusive_ptr_add_ref(Object*);
    friend void intrusive_ptr_release(Object*);

    std::atomic<u32> ref_count{0};
-    std::atomic<u32> object_id{next_object_id++};
+    std::atomic<u32> object_id{0};
 };

 // Special functions used by boost::instrusive_ptr to do automatic ref-counting
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -8,6 +8,7 @@
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/kernel/thread.h"
@@ -16,30 +17,26 @@

 namespace Kernel {

-// Lists all processes that exist in the current session.
-static std::vector<SharedPtr<Process>> process_list;
-
-SharedPtr<CodeSet> CodeSet::Create(std::string name) {
-    SharedPtr<CodeSet> codeset(new CodeSet);
+SharedPtr<CodeSet> CodeSet::Create(KernelCore& kernel, std::string name) {
+    SharedPtr<CodeSet> codeset(new CodeSet(kernel));
    codeset->name = std::move(name);
    return codeset;
 }

-CodeSet::CodeSet() {}
-CodeSet::~CodeSet() {}
+CodeSet::CodeSet(KernelCore& kernel) : Object{kernel} {}
+CodeSet::~CodeSet() = default;

-u32 Process::next_process_id;
-
-SharedPtr<Process> Process::Create(std::string&& name) {
-    SharedPtr<Process> process(new Process);
+SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) {
+    SharedPtr<Process> process(new Process(kernel));

    process->name = std::move(name);
    process->flags.raw = 0;
    process->flags.memory_region.Assign(MemoryRegion::APPLICATION);
    process->status = ProcessStatus::Created;
    process->program_id = 0;
+    process->process_id = kernel.CreateNewProcessID();

-    process_list.push_back(process);
+    kernel.AppendNewProcess(process);
    return process;
 }

@@ -128,7 +125,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
    vm_manager.LogLayout();
    status = ProcessStatus::Running;

-    Kernel::SetupMainThread(entry_point, main_thread_priority, this);
+    Kernel::SetupMainThread(kernel, entry_point, main_thread_priority, this);
 }

 void Process::LoadModule(SharedPtr<CodeSet> module_, VAddr base_addr) {
@@ -231,22 +228,7 @@ ResultCode Process::UnmapMemory(VAddr dst_addr, VAddr /*src_addr*/, u64 size) {
    return vm_manager.UnmapRange(dst_addr, size);
 }

-Kernel::Process::Process() {}
+Kernel::Process::Process(KernelCore& kernel) : Object{kernel} {}
 Kernel::Process::~Process() {}

-void ClearProcessList() {
-    process_list.clear();
-}
-
-SharedPtr<Process> GetProcessById(u32 process_id) {
-    auto itr = std::find_if(
-        process_list.begin(), process_list.end(),
-        [&](const SharedPtr<Process>& process) { return process->process_id == process_id; });
-
-    if (itr == process_list.end())
-        return nullptr;
-
-    return *itr;
-}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -19,6 +19,8 @@

 namespace Kernel {

+class KernelCore;
+
 struct AddressMapping {
    // Address and size must be page-aligned
    VAddr address;
@@ -62,7 +64,7 @@ struct CodeSet final : public Object {
        u32 size = 0;
    };

-    static SharedPtr<CodeSet> Create(std::string name);
+    static SharedPtr<CodeSet> Create(KernelCore& kernel, std::string name);

    std::string GetTypeName() const override {
        return "CodeSet";
@@ -109,13 +111,13 @@ struct CodeSet final : public Object {
    std::string name;

 private:
-    CodeSet();
+    explicit CodeSet(KernelCore& kernel);
    ~CodeSet() override;
 };

 class Process final : public Object {
 public:
-    static SharedPtr<Process> Create(std::string&& name);
+    static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name);

    std::string GetTypeName() const override {
        return "Process";
@@ -129,8 +131,6 @@ public:
        return HANDLE_TYPE;
    }

-    static u32 next_process_id;
-
    /// Title ID corresponding to the process
    u64 program_id;

@@ -157,8 +157,8 @@ public:
    /// Current status of the process
    ProcessStatus status;

-    /// The id of this process
-    u32 process_id = next_process_id++;
+    /// The ID of this process
+    u32 process_id = 0;

    /**
     * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
@@ -206,13 +206,8 @@ public:
    ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size);

 private:
-    Process();
+    explicit Process(KernelCore& kernel);
    ~Process() override;
 };

-void ClearProcessList();
-
-/// Retrieves a process from the current list of processes.
-SharedPtr<Process> GetProcessById(u32 process_id);
-
 } // namespace Kernel
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -9,31 +9,16 @@

 namespace Kernel {

-static SharedPtr<ResourceLimit> resource_limits[4];
+ResourceLimit::ResourceLimit(KernelCore& kernel) : Object{kernel} {}
+ResourceLimit::~ResourceLimit() = default;

-ResourceLimit::ResourceLimit() {}
-ResourceLimit::~ResourceLimit() {}
-
-SharedPtr<ResourceLimit> ResourceLimit::Create(std::string name) {
-    SharedPtr<ResourceLimit> resource_limit(new ResourceLimit);
+SharedPtr<ResourceLimit> ResourceLimit::Create(KernelCore& kernel, std::string name) {
+    SharedPtr<ResourceLimit> resource_limit(new ResourceLimit(kernel));

    resource_limit->name = std::move(name);
    return resource_limit;
 }

-SharedPtr<ResourceLimit> ResourceLimit::GetForCategory(ResourceLimitCategory category) {
-    switch (category) {
-    case ResourceLimitCategory::APPLICATION:
-    case ResourceLimitCategory::SYS_APPLET:
-    case ResourceLimitCategory::LIB_APPLET:
-    case ResourceLimitCategory::OTHER:
-        return resource_limits[static_cast<u8>(category)];
-    default:
-        LOG_CRITICAL(Kernel, "Unknown resource limit category");
-        UNREACHABLE();
-    }
-}
-
 s32 ResourceLimit::GetCurrentResourceValue(ResourceType resource) const {
    switch (resource) {
    case ResourceType::Commit:
@@ -89,66 +74,4 @@ u32 ResourceLimit::GetMaxResourceValue(ResourceType resource) const {
        return 0;
    }
 }
-
-void ResourceLimitsInit() {
-    // Create the four resource limits that the system uses
-    // Create the APPLICATION resource limit
-    SharedPtr<ResourceLimit> resource_limit = ResourceLimit::Create("Applications");
-    resource_limit->max_priority = 0x18;
-    resource_limit->max_commit = 0x4000000;
-    resource_limit->max_threads = 0x20;
-    resource_limit->max_events = 0x20;
-    resource_limit->max_mutexes = 0x20;
-    resource_limit->max_semaphores = 0x8;
-    resource_limit->max_timers = 0x8;
-    resource_limit->max_shared_mems = 0x10;
-    resource_limit->max_address_arbiters = 0x2;
-    resource_limit->max_cpu_time = 0x1E;
-    resource_limits[static_cast<u8>(ResourceLimitCategory::APPLICATION)] = resource_limit;
-
-    // Create the SYS_APPLET resource limit
-    resource_limit = ResourceLimit::Create("System Applets");
-    resource_limit->max_priority = 0x4;
-    resource_limit->max_commit = 0x5E00000;
-    resource_limit->max_threads = 0x1D;
-    resource_limit->max_events = 0xB;
-    resource_limit->max_mutexes = 0x8;
-    resource_limit->max_semaphores = 0x4;
-    resource_limit->max_timers = 0x4;
-    resource_limit->max_shared_mems = 0x8;
-    resource_limit->max_address_arbiters = 0x3;
-    resource_limit->max_cpu_time = 0x2710;
-    resource_limits[static_cast<u8>(ResourceLimitCategory::SYS_APPLET)] = resource_limit;
-
-    // Create the LIB_APPLET resource limit
-    resource_limit = ResourceLimit::Create("Library Applets");
-    resource_limit->max_priority = 0x4;
-    resource_limit->max_commit = 0x600000;
-    resource_limit->max_threads = 0xE;
-    resource_limit->max_events = 0x8;
-    resource_limit->max_mutexes = 0x8;
-    resource_limit->max_semaphores = 0x4;
-    resource_limit->max_timers = 0x4;
-    resource_limit->max_shared_mems = 0x8;
-    resource_limit->max_address_arbiters = 0x1;
-    resource_limit->max_cpu_time = 0x2710;
-    resource_limits[static_cast<u8>(ResourceLimitCategory::LIB_APPLET)] = resource_limit;
-
-    // Create the OTHER resource limit
-    resource_limit = ResourceLimit::Create("Others");
-    resource_limit->max_priority = 0x4;
-    resource_limit->max_commit = 0x2180000;
-    resource_limit->max_threads = 0xE1;
-    resource_limit->max_events = 0x108;
-    resource_limit->max_mutexes = 0x25;
-    resource_limit->max_semaphores = 0x43;
-    resource_limit->max_timers = 0x2C;
-    resource_limit->max_shared_mems = 0x1F;
-    resource_limit->max_address_arbiters = 0x2D;
-    resource_limit->max_cpu_time = 0x3E8;
-    resource_limits[static_cast<u8>(ResourceLimitCategory::OTHER)] = resource_limit;
-}
-
-void ResourceLimitsShutdown() {}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/resource_limit.h
+++ b/src/core/hle/kernel/resource_limit.h
@@ -9,6 +9,8 @@

 namespace Kernel {

+class KernelCore;
+
 enum class ResourceLimitCategory : u8 {
    APPLICATION = 0,
    SYS_APPLET = 1,
@@ -34,14 +36,7 @@ public:
    /**
     * Creates a resource limit object.
     */
-    static SharedPtr<ResourceLimit> Create(std::string name = "Unknown");
-
-    /**
-     * Retrieves the resource limit associated with the specified resource limit category.
-     * @param category The resource limit category
-     * @returns The resource limit associated with the category
-     */
-    static SharedPtr<ResourceLimit> GetForCategory(ResourceLimitCategory category);
+    static SharedPtr<ResourceLimit> Create(KernelCore& kernel, std::string name = "Unknown");

    std::string GetTypeName() const override {
        return "ResourceLimit";
@@ -113,14 +108,8 @@ public:
    s32 current_cpu_time = 0;

 private:
-    ResourceLimit();
+    explicit ResourceLimit(KernelCore& kernel);
    ~ResourceLimit() override;
 };

-/// Initializes the resource limits
-void ResourceLimitsInit();
-
-// Destroys the resource limits
-void ResourceLimitsShutdown();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -13,8 +13,8 @@

 namespace Kernel {

-ServerPort::ServerPort() {}
-ServerPort::~ServerPort() {}
+ServerPort::ServerPort(KernelCore& kernel) : WaitObject{kernel} {}
+ServerPort::~ServerPort() = default;

 ResultVal<SharedPtr<ServerSession>> ServerPort::Accept() {
    if (pending_sessions.empty()) {
@@ -36,10 +36,10 @@ void ServerPort::Acquire(Thread* thread) {
 }

 std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair(
-    u32 max_sessions, std::string name) {
+    KernelCore& kernel, u32 max_sessions, std::string name) {

-    SharedPtr<ServerPort> server_port(new ServerPort);
-    SharedPtr<ClientPort> client_port(new ClientPort);
+    SharedPtr<ServerPort> server_port(new ServerPort(kernel));
+    SharedPtr<ClientPort> client_port(new ClientPort(kernel));

    server_port->name = name + "_Server";
    client_port->name = name + "_Client";
--- a/src/core/hle/kernel/server_port.h
+++ b/src/core/hle/kernel/server_port.h
@@ -15,6 +15,7 @@
 namespace Kernel {

 class ClientPort;
+class KernelCore;
 class ServerSession;
 class SessionRequestHandler;

@@ -23,12 +24,13 @@ public:
    /**
     * Creates a pair of ServerPort and an associated ClientPort.
     *
+     * @param kernel The kernel instance to create the port pair under.
     * @param max_sessions Maximum number of sessions to the port
     * @param name Optional name of the ports
     * @return The created port tuple
     */
    static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair(
-        u32 max_sessions, std::string name = "UnknownPort");
+        KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort");

    std::string GetTypeName() const override {
        return "ServerPort";
@@ -69,7 +71,7 @@ public:
    void Acquire(Thread* thread) override;

 private:
-    ServerPort();
+    explicit ServerPort(KernelCore& kernel);
    ~ServerPort() override;
 };

--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -13,6 +13,7 @@
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
@@ -20,7 +21,7 @@

 namespace Kernel {

-ServerSession::ServerSession() = default;
+ServerSession::ServerSession(KernelCore& kernel) : WaitObject{kernel} {}
 ServerSession::~ServerSession() {
    // This destructor will be called automatically when the last ServerSession handle is closed by
    // the emulated application.
@@ -35,8 +36,8 @@ ServerSession::~ServerSession() {
    parent->server = nullptr;
 }

-ResultVal<SharedPtr<ServerSession>> ServerSession::Create(std::string name) {
-    SharedPtr<ServerSession> server_session(new ServerSession);
+ResultVal<SharedPtr<ServerSession>> ServerSession::Create(KernelCore& kernel, std::string name) {
+    SharedPtr<ServerSession> server_session(new ServerSession(kernel));

    server_session->name = std::move(name);
    server_session->parent = nullptr;
@@ -104,11 +105,10 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
    // The ServerSession received a sync request, this means that there's new data available
    // from its ClientSession, so wake up any threads that may be waiting on a svcReplyAndReceive or
    // similar.
-
    Kernel::HLERequestContext context(this);
    u32* cmd_buf = (u32*)Memory::GetPointer(thread->GetTLSAddress());
    context.PopulateFromIncomingCommandBuffer(cmd_buf, *Core::CurrentProcess(),
-                                              Kernel::g_handle_table);
+                                              kernel.HandleTable());

    ResultCode result = RESULT_SUCCESS;
    // If the session has been converted to a domain, handle the domain request
@@ -160,10 +160,11 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {
    return result;
 }

-ServerSession::SessionPair ServerSession::CreateSessionPair(const std::string& name,
+ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel,
+                                                            const std::string& name,
                                                            SharedPtr<ClientPort> port) {
-    auto server_session = ServerSession::Create(name + "_Server").Unwrap();
-    SharedPtr<ClientSession> client_session(new ClientSession);
+    auto server_session = ServerSession::Create(kernel, name + "_Server").Unwrap();
+    SharedPtr<ClientSession> client_session(new ClientSession(kernel));
    client_session->name = name + "_Client";

    std::shared_ptr<Session> parent(new Session);
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -15,13 +15,14 @@

 namespace Kernel {

-class ClientSession;
 class ClientPort;
+class ClientSession;
+class HLERequestContext;
+class KernelCore;
 class ServerSession;
 class Session;
 class SessionRequestHandler;
 class Thread;
-class HLERequestContext;

 /**
 * Kernel object representing the server endpoint of an IPC session. Sessions are the basic CTR-OS
@@ -50,11 +51,12 @@ public:

    /**
     * Creates a pair of ServerSession and an associated ClientSession.
+     * @param kernel      The kernal instance to create the session pair under.
     * @param name        Optional name of the ports.
     * @param client_port Optional The ClientPort that spawned this session.
     * @return The created session tuple
     */
-    static SessionPair CreateSessionPair(const std::string& name = "Unknown",
+    static SessionPair CreateSessionPair(KernelCore& kernel, const std::string& name = "Unknown",
                                         SharedPtr<ClientPort> client_port = nullptr);

    /**
@@ -111,16 +113,18 @@ public:
    }

 private:
-    ServerSession();
+    explicit ServerSession(KernelCore& kernel);
    ~ServerSession() override;

    /**
     * Creates a server session. The server session can have an optional HLE handler,
     * which will be invoked to handle the IPC requests that this session receives.
+     * @param kernel The kernel instance to create this server session under.
     * @param name Optional name of the server session.
     * @return The created server session
     */
-    static ResultVal<SharedPtr<ServerSession>> Create(std::string name = "Unknown");
+    static ResultVal<SharedPtr<ServerSession>> Create(KernelCore& kernel,
+                                                      std::string name = "Unknown");

    /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an
    /// object handle.
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -13,14 +13,14 @@

 namespace Kernel {

-SharedMemory::SharedMemory() {}
-SharedMemory::~SharedMemory() {}
+SharedMemory::SharedMemory(KernelCore& kernel) : Object{kernel} {}
+SharedMemory::~SharedMemory() = default;

-SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u64 size,
-                                             MemoryPermission permissions,
+SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, SharedPtr<Process> owner_process,
+                                             u64 size, MemoryPermission permissions,
                                             MemoryPermission other_permissions, VAddr address,
                                             MemoryRegion region, std::string name) {
-    SharedPtr<SharedMemory> shared_memory(new SharedMemory);
+    SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));

    shared_memory->owner_process = std::move(owner_process);
    shared_memory->name = std::move(name);
@@ -59,12 +59,10 @@ SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u
    return shared_memory;
 }

-SharedPtr<SharedMemory> SharedMemory::CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block,
-                                                      u32 offset, u32 size,
-                                                      MemoryPermission permissions,
-                                                      MemoryPermission other_permissions,
-                                                      std::string name) {
-    SharedPtr<SharedMemory> shared_memory(new SharedMemory);
+SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
+    KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
+    MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
+    SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));

    shared_memory->owner_process = nullptr;
    shared_memory->name = std::move(name);
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -15,6 +15,8 @@

 namespace Kernel {

+class KernelCore;
+
 /// Permissions for mapped shared memory blocks
 enum class MemoryPermission : u32 {
    None = 0,
@@ -32,6 +34,7 @@ class SharedMemory final : public Object {
 public:
    /**
     * Creates a shared memory object.
+     * @param kernel The kernel instance to create a shared memory instance under.
     * @param owner_process Process that created this shared memory object.
     * @param size Size of the memory block. Must be page-aligned.
     * @param permissions Permission restrictions applied to the process which created the block.
@@ -42,14 +45,15 @@ public:
     * linear heap.
     * @param name Optional object name, used for debugging purposes.
     */
-    static SharedPtr<SharedMemory> Create(SharedPtr<Process> owner_process, u64 size,
-                                          MemoryPermission permissions,
+    static SharedPtr<SharedMemory> Create(KernelCore& kernel, SharedPtr<Process> owner_process,
+                                          u64 size, MemoryPermission permissions,
                                          MemoryPermission other_permissions, VAddr address = 0,
                                          MemoryRegion region = MemoryRegion::BASE,
                                          std::string name = "Unknown");

    /**
     * Creates a shared memory object from a block of memory managed by an HLE applet.
+     * @param kernel The kernel instance to create a shared memory instance under.
     * @param heap_block Heap block of the HLE applet.
     * @param offset The offset into the heap block that the SharedMemory will map.
     * @param size Size of the memory block. Must be page-aligned.
@@ -58,7 +62,8 @@ public:
     * block.
     * @param name Optional object name, used for debugging purposes.
     */
-    static SharedPtr<SharedMemory> CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block,
+    static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel,
+                                                   std::shared_ptr<std::vector<u8>> heap_block,
                                                   u32 offset, u32 size,
                                                   MemoryPermission permissions,
                                                   MemoryPermission other_permissions,
@@ -125,7 +130,7 @@ public:
    std::string name;

 private:
-    SharedMemory();
+    explicit SharedMemory(KernelCore& kernel);
    ~SharedMemory() override;
 };

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -12,16 +12,20 @@
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/string_util.h"
+#include "core/arm/exclusive_monitor.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/mutex.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/shared_memory.h"
 #include "core/hle/kernel/svc.h"
 #include "core/hle/kernel/svc_wrap.h"
@@ -64,19 +68,22 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {

 /// Connect to an OS service given the port name, returns the handle to the port to out
 static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address) {
-    if (!Memory::IsValidVirtualAddress(port_name_address))
+    if (!Memory::IsValidVirtualAddress(port_name_address)) {
        return ERR_NOT_FOUND;
+    }

    static constexpr std::size_t PortNameMaxLength = 11;
    // Read 1 char beyond the max allowed port name to detect names that are too long.
    std::string port_name = Memory::ReadCString(port_name_address, PortNameMaxLength + 1);
-    if (port_name.size() > PortNameMaxLength)
+    if (port_name.size() > PortNameMaxLength) {
        return ERR_PORT_NAME_TOO_LONG;
+    }

    LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);

-    auto it = Service::g_kernel_named_ports.find(port_name);
-    if (it == Service::g_kernel_named_ports.end()) {
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto it = kernel.FindNamedPort(port_name);
+    if (!kernel.IsValidNamedPort(it)) {
        LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
        return ERR_NOT_FOUND;
    }
@@ -87,13 +94,14 @@ static ResultCode ConnectToNamedPort(Handle* out_handle, VAddr port_name_address
    CASCADE_RESULT(client_session, client_port->Connect());

    // Return the client session
-    CASCADE_RESULT(*out_handle, g_handle_table.Create(client_session));
+    CASCADE_RESULT(*out_handle, kernel.HandleTable().Create(client_session));
    return RESULT_SUCCESS;
 }

 /// Makes a blocking IPC call to an OS service.
 static ResultCode SendSyncRequest(Handle handle) {
-    SharedPtr<ClientSession> session = g_handle_table.Get<ClientSession>(handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<ClientSession> session = kernel.HandleTable().Get<ClientSession>(handle);
    if (!session) {
        LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
        return ERR_INVALID_HANDLE;
@@ -112,7 +120,8 @@ static ResultCode SendSyncRequest(Handle handle) {
 static ResultCode GetThreadId(u32* thread_id, Handle thread_handle) {
    LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);

-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
    }
@@ -125,7 +134,8 @@ static ResultCode GetThreadId(u32* thread_id, Handle thread_handle) {
 static ResultCode GetProcessId(u32* process_id, Handle process_handle) {
    LOG_TRACE(Kernel_SVC, "called process=0x{:08X}", process_handle);

-    const SharedPtr<Process> process = g_handle_table.Get<Process>(process_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Process> process = kernel.HandleTable().Get<Process>(process_handle);
    if (!process) {
        return ERR_INVALID_HANDLE;
    }
@@ -168,10 +178,11 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64

    using ObjectPtr = SharedPtr<WaitObject>;
    std::vector<ObjectPtr> objects(handle_count);
+    auto& kernel = Core::System::GetInstance().Kernel();

    for (u64 i = 0; i < handle_count; ++i) {
        const Handle handle = Memory::Read32(handles_address + i * sizeof(Handle));
-        const auto object = g_handle_table.Get<WaitObject>(handle);
+        const auto object = kernel.HandleTable().Get<WaitObject>(handle);

        if (object == nullptr) {
            return ERR_INVALID_HANDLE;
@@ -219,7 +230,8 @@ static ResultCode WaitSynchronization(Handle* index, VAddr handles_address, u64
 static ResultCode CancelSynchronization(Handle thread_handle) {
    LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);

-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
    }
@@ -239,7 +251,9 @@ static ResultCode ArbitrateLock(Handle holding_thread_handle, VAddr mutex_addr,
              "requesting_current_thread_handle=0x{:08X}",
              holding_thread_handle, mutex_addr, requesting_thread_handle);

-    return Mutex::TryAcquire(mutex_addr, holding_thread_handle, requesting_thread_handle);
+    auto& handle_table = Core::System::GetInstance().Kernel().HandleTable();
+    return Mutex::TryAcquire(handle_table, mutex_addr, holding_thread_handle,
+                             requesting_thread_handle);
 }

 /// Unlock a mutex
@@ -352,7 +366,8 @@ static ResultCode GetThreadContext(Handle handle, VAddr addr) {

 /// Gets the priority for the specified thread
 static ResultCode GetThreadPriority(u32* priority, Handle handle) {
-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(handle);
    if (!thread)
        return ERR_INVALID_HANDLE;

@@ -366,7 +381,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
        return ERR_OUT_OF_RANGE;
    }

-    SharedPtr<Thread> thread = g_handle_table.Get<Thread>(handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(handle);
    if (!thread)
        return ERR_INVALID_HANDLE;

@@ -395,7 +411,8 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
              "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
              shared_memory_handle, addr, size, permissions);

-    SharedPtr<SharedMemory> shared_memory = g_handle_table.Get<SharedMemory>(shared_memory_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);
    if (!shared_memory) {
        return ERR_INVALID_HANDLE;
    }
@@ -423,7 +440,8 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
    LOG_WARNING(Kernel_SVC, "called, shared_memory_handle=0x{:08X}, addr=0x{:X}, size=0x{:X}",
                shared_memory_handle, addr, size);

-    SharedPtr<SharedMemory> shared_memory = g_handle_table.Get<SharedMemory>(shared_memory_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto shared_memory = kernel.HandleTable().Get<SharedMemory>(shared_memory_handle);

    return shared_memory->Unmap(Core::CurrentProcess().get(), addr);
 }
@@ -431,7 +449,9 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
 /// Query process memory
 static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* /*page_info*/,
                                     Handle process_handle, u64 addr) {
-    SharedPtr<Process> process = g_handle_table.Get<Process>(process_handle);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<Process> process = kernel.HandleTable().Get<Process>(process_handle);
    if (!process) {
        return ERR_INVALID_HANDLE;
    }
@@ -528,10 +548,11 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
        break;
    }

+    auto& kernel = Core::System::GetInstance().Kernel();
    CASCADE_RESULT(SharedPtr<Thread> thread,
-                   Thread::Create(name, entry_point, priority, arg, processor_id, stack_top,
+                   Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top,
                                  Core::CurrentProcess()));
-    CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread));
+    CASCADE_RESULT(thread->guest_handle, kernel.HandleTable().Create(thread));
    *out_handle = thread->guest_handle;

    Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
@@ -548,7 +569,8 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
 static ResultCode StartThread(Handle thread_handle) {
    LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);

-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
    }
@@ -595,7 +617,8 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
        "called mutex_addr={:X}, condition_variable_addr={:X}, thread_handle=0x{:08X}, timeout={}",
        mutex_addr, condition_variable_addr, thread_handle, nano_seconds);

-    SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    ASSERT(thread);

    CASCADE_CODE(Mutex::Release(mutex_addr));
@@ -704,8 +727,9 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
                                               mutex_val | Mutex::MutexHasWaitersFlag));

            // The mutex is already owned by some other thread, make this thread wait on it.
+            auto& kernel = Core::System::GetInstance().Kernel();
            Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            auto owner = g_handle_table.Get<Thread>(owner_handle);
+            auto owner = kernel.HandleTable().Get<Thread>(owner_handle);
            ASSERT(owner);
            ASSERT(thread->status == ThreadStatus::WaitMutex);
            thread->wakeup_callback = nullptr;
@@ -783,14 +807,20 @@ static u64 GetSystemTick() {
 /// Close a handle
 static ResultCode CloseHandle(Handle handle) {
    LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
-    return g_handle_table.Close(handle);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    return kernel.HandleTable().Close(handle);
 }

 /// Reset an event
 static ResultCode ResetSignal(Handle handle) {
    LOG_WARNING(Kernel_SVC, "(STUBBED) called handle 0x{:08X}", handle);
-    auto event = g_handle_table.Get<Event>(handle);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto event = kernel.HandleTable().Get<Event>(handle);
+
    ASSERT(event != nullptr);
+
    event->Clear();
    return RESULT_SUCCESS;
 }
@@ -806,7 +836,8 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
 static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
    LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);

-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
    }
@@ -821,7 +852,8 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
    LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:16X}, core=0x{:X}", thread_handle,
              mask, core);

-    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    const SharedPtr<Thread> thread = kernel.HandleTable().Get<Thread>(thread_handle);
    if (!thread) {
        return ERR_INVALID_HANDLE;
    }
@@ -861,19 +893,23 @@ static ResultCode CreateSharedMemory(Handle* handle, u64 size, u32 local_permiss
                                     u32 remote_permissions) {
    LOG_TRACE(Kernel_SVC, "called, size=0x{:X}, localPerms=0x{:08X}, remotePerms=0x{:08X}", size,
              local_permissions, remote_permissions);
-    auto sharedMemHandle =
-        SharedMemory::Create(g_handle_table.Get<Process>(KernelHandle::CurrentProcess), size,
+
+    auto& kernel = Core::System::GetInstance().Kernel();
+    auto& handle_table = kernel.HandleTable();
+    auto shared_mem_handle =
+        SharedMemory::Create(kernel, handle_table.Get<Process>(KernelHandle::CurrentProcess), size,
                             static_cast<MemoryPermission>(local_permissions),
                             static_cast<MemoryPermission>(remote_permissions));

-    CASCADE_RESULT(*handle, g_handle_table.Create(sharedMemHandle));
+    CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
    return RESULT_SUCCESS;
 }

 static ResultCode ClearEvent(Handle handle) {
    LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);

-    SharedPtr<Event> evt = g_handle_table.Get<Event>(handle);
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<Event> evt = kernel.HandleTable().Get<Event>(handle);
    if (evt == nullptr)
        return ERR_INVALID_HANDLE;
    evt->Clear();
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -16,22 +16,21 @@
 #include "common/thread_queue_list.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
-#include "core/hle/lock.h"
 #include "core/hle/result.h"
 #include "core/memory.h"

 namespace Kernel {

-/// Event type for the thread wake up event
-static CoreTiming::EventType* ThreadWakeupEventType = nullptr;
-
 bool Thread::ShouldWait(Thread* thread) const {
    return status != ThreadStatus::Dead;
 }
@@ -40,32 +39,17 @@ void Thread::Acquire(Thread* thread) {
    ASSERT_MSG(!ShouldWait(thread), "object unavailable!");
 }

-// TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, allowing
-//               us to simply use a pool index or similar.
-static Kernel::HandleTable wakeup_callback_handle_table;
-
-// The first available thread id at startup
-static u32 next_thread_id;
-
-/**
- * Creates a new thread ID
- * @return The new thread ID
- */
-inline static u32 const NewThreadId() {
-    return next_thread_id++;
-}
-
-Thread::Thread() {}
-Thread::~Thread() {}
+Thread::Thread(KernelCore& kernel) : WaitObject{kernel} {}
+Thread::~Thread() = default;

 void Thread::Stop() {
    // Cancel any outstanding wakeup events for this thread
-    CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
-    wakeup_callback_handle_table.Close(callback_handle);
+    CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;

    // Clean up thread from ready queue
-    // This is only needed when the thread is termintated forcefully (SVC TerminateProcess)
+    // This is only needed when the thread is terminated forcefully (SVC TerminateProcess)
    if (status == ThreadStatus::Ready) {
        scheduler->UnscheduleThread(this, current_priority);
    }
@@ -98,63 +82,6 @@ void ExitCurrentThread() {
    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
 }

-/**
- * Callback that will wake up the thread it was scheduled for
- * @param thread_handle The handle of the thread that's been awoken
- * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
- */
-static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
-    const auto proper_handle = static_cast<Handle>(thread_handle);
-
-    // Lock the global kernel mutex when we enter the kernel HLE.
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
-
-    SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle);
-    if (thread == nullptr) {
-        LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
-        return;
-    }
-
-    bool resume = true;
-
-    if (thread->status == ThreadStatus::WaitSynchAny ||
-        thread->status == ThreadStatus::WaitSynchAll ||
-        thread->status == ThreadStatus::WaitHLEEvent) {
-        // Remove the thread from each of its waiting objects' waitlists
-        for (auto& object : thread->wait_objects)
-            object->RemoveWaitingThread(thread.get());
-        thread->wait_objects.clear();
-
-        // Invoke the wakeup callback before clearing the wait objects
-        if (thread->wakeup_callback)
-            resume = thread->wakeup_callback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
-    }
-
-    if (thread->mutex_wait_address != 0 || thread->condvar_wait_address != 0 ||
-        thread->wait_handle) {
-        ASSERT(thread->status == ThreadStatus::WaitMutex);
-        thread->mutex_wait_address = 0;
-        thread->condvar_wait_address = 0;
-        thread->wait_handle = 0;
-
-        auto lock_owner = thread->lock_owner;
-        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
-        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
-        // wasn't awakened due to the mutex already being acquired.
-        if (lock_owner) {
-            lock_owner->RemoveMutexWaiter(thread);
-        }
-    }
-
-    if (thread->arb_wait_address != 0) {
-        ASSERT(thread->status == ThreadStatus::WaitArb);
-        thread->arb_wait_address = 0;
-    }
-
-    if (resume)
-        thread->ResumeFromWait();
-}
-
 void Thread::WakeAfterDelay(s64 nanoseconds) {
    // Don't schedule a wakeup if the thread wants to wait forever
    if (nanoseconds == -1)
@@ -162,12 +89,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {

    // This function might be called from any thread so we have to be cautious and use the
    // thread-safe version of ScheduleEvent.
-    CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
-                                        callback_handle);
+    CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds),
+                                        kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

 void Thread::CancelWakeupTimer() {
-    CoreTiming::UnscheduleEventThreadsafe(ThreadWakeupEventType, callback_handle);
+    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

 static boost::optional<s32> GetNextProcessorId(u64 mask) {
@@ -294,9 +221,9 @@ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAdd
    context.fpscr = 0;
 }

-ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, u32 priority,
-                                            u64 arg, s32 processor_id, VAddr stack_top,
-                                            SharedPtr<Process> owner_process) {
+ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
+                                            u32 priority, u64 arg, s32 processor_id,
+                                            VAddr stack_top, SharedPtr<Process> owner_process) {
    // Check if priority is in ranged. Lowest priority -> highest priority id.
    if (priority > THREADPRIO_LOWEST) {
        LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
@@ -316,9 +243,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
        return ResultCode(-1);
    }

-    SharedPtr<Thread> thread(new Thread);
+    SharedPtr<Thread> thread(new Thread(kernel));

-    thread->thread_id = NewThreadId();
+    thread->thread_id = kernel.CreateNewThreadID();
    thread->status = ThreadStatus::Dormant;
    thread->entry_point = entry_point;
    thread->stack_top = stack_top;
@@ -333,7 +260,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->condvar_wait_address = 0;
    thread->wait_handle = 0;
    thread->name = std::move(name);
-    thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
+    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = owner_process;
    thread->scheduler = Core::System::GetInstance().Scheduler(processor_id);
    thread->scheduler->AddThread(thread, priority);
@@ -383,19 +310,19 @@ void Thread::BoostPriority(u32 priority) {
    current_priority = priority;
 }

-SharedPtr<Thread> SetupMainThread(VAddr entry_point, u32 priority,
+SharedPtr<Thread> SetupMainThread(KernelCore& kernel, VAddr entry_point, u32 priority,
                                  SharedPtr<Process> owner_process) {
    // Setup page table so we can write to memory
    SetCurrentPageTable(&Core::CurrentProcess()->vm_manager.page_table);

    // Initialize new "main" thread
-    auto thread_res = Thread::Create("main", entry_point, priority, 0, THREADPROCESSORID_0,
+    auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, THREADPROCESSORID_0,
                                     Memory::STACK_AREA_VADDR_END, std::move(owner_process));

    SharedPtr<Thread> thread = std::move(thread_res).Unwrap();

    // Register 1 must be a handle to the main thread
-    thread->guest_handle = Kernel::g_handle_table.Create(thread).Unwrap();
+    thread->guest_handle = kernel.HandleTable().Create(thread).Unwrap();

    thread->context.cpu_registers[1] = thread->guest_handle;

@@ -528,13 +455,4 @@ Thread* GetCurrentThread() {
    return Core::System::GetInstance().CurrentScheduler().GetCurrentThread();
 }

-void ThreadingInit() {
-    ThreadWakeupEventType = CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
-    next_thread_id = 1;
-}
-
-void ThreadingShutdown() {
-    Kernel::ClearProcessList();
-}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -56,6 +56,7 @@ enum class ThreadWakeupReason {

 namespace Kernel {

+class KernelCore;
 class Process;
 class Scheduler;

@@ -63,6 +64,7 @@ class Thread final : public WaitObject {
 public:
    /**
     * Creates and returns a new thread. The new thread is immediately scheduled
+     * @param kernel The kernel instance this thread will be created under.
     * @param name The friendly name desired for the thread
     * @param entry_point The address at which the thread should start execution
     * @param priority The thread's priority
@@ -72,8 +74,9 @@ public:
     * @param owner_process The parent process for the thread
     * @return A shared pointer to the newly created thread
     */
-    static ResultVal<SharedPtr<Thread>> Create(std::string name, VAddr entry_point, u32 priority,
-                                               u64 arg, s32 processor_id, VAddr stack_top,
+    static ResultVal<SharedPtr<Thread>> Create(KernelCore& kernel, std::string name,
+                                               VAddr entry_point, u32 priority, u64 arg,
+                                               s32 processor_id, VAddr stack_top,
                                               SharedPtr<Process> owner_process);

    std::string GetName() const override {
@@ -263,7 +266,7 @@ public:
    u64 affinity_mask{0x1};

 private:
-    Thread();
+    explicit Thread(KernelCore& kernel);
    ~Thread() override;

    std::shared_ptr<std::vector<u8>> tls_memory = std::make_shared<std::vector<u8>>();
@@ -271,12 +274,13 @@ private:

 /**
 * Sets up the primary application thread
+ * @param kernel The kernel instance to create the main thread under.
 * @param entry_point The address at which the thread should start execution
 * @param priority The priority to give the main thread
 * @param owner_process The parent process for the main thread
 * @return A shared pointer to the main thread
 */
-SharedPtr<Thread> SetupMainThread(VAddr entry_point, u32 priority,
+SharedPtr<Thread> SetupMainThread(KernelCore& kernel, VAddr entry_point, u32 priority,
                                  SharedPtr<Process> owner_process);

 /**
@@ -294,14 +298,4 @@ void WaitCurrentThread_Sleep();
 */
 void ExitCurrentThread();

-/**
- * Initialize threading
- */
-void ThreadingInit();
-
-/**
- * Shutdown threading
- */
-void ThreadingShutdown();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@@ -2,36 +2,31 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <cinttypes>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/kernel/timer.h"

 namespace Kernel {

-/// The event type of the generic timer callback event
-static CoreTiming::EventType* timer_callback_event_type = nullptr;
-// TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future, allowing
-//               us to simply use a pool index or similar.
-static Kernel::HandleTable timer_callback_handle_table;
+Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {}
+Timer::~Timer() = default;

-Timer::Timer() {}
-Timer::~Timer() {}
-
-SharedPtr<Timer> Timer::Create(ResetType reset_type, std::string name) {
-    SharedPtr<Timer> timer(new Timer);
+SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) {
+    SharedPtr<Timer> timer(new Timer(kernel));

    timer->reset_type = reset_type;
    timer->signaled = false;
    timer->name = std::move(name);
    timer->initial_delay = 0;
    timer->interval_delay = 0;
-    timer->callback_handle = timer_callback_handle_table.Create(timer).Unwrap();
+    timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap();

    return timer;
 }
@@ -58,13 +53,13 @@ void Timer::Set(s64 initial, s64 interval) {
        // Immediately invoke the callback
        Signal(0);
    } else {
-        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), timer_callback_event_type,
+        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(),
                                  callback_handle);
    }
 }

 void Timer::Cancel() {
-    CoreTiming::UnscheduleEvent(timer_callback_event_type, callback_handle);
+    CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle);
 }

 void Timer::Clear() {
@@ -89,28 +84,8 @@ void Timer::Signal(int cycles_late) {
    if (interval_delay != 0) {
        // Reschedule the timer with the interval delay
        CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late,
-                                  timer_callback_event_type, callback_handle);
+                                  kernel.TimerCallbackEventType(), callback_handle);
    }
 }

-/// The timer callback event, called when a timer is fired
-static void TimerCallback(u64 timer_handle, int cycles_late) {
-    SharedPtr<Timer> timer =
-        timer_callback_handle_table.Get<Timer>(static_cast<Handle>(timer_handle));
-
-    if (timer == nullptr) {
-        LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle);
-        return;
-    }
-
-    timer->Signal(cycles_late);
-}
-
-void TimersInit() {
-    timer_callback_handle_table.Clear();
-    timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback);
-}
-
-void TimersShutdown() {}
-
 } // namespace Kernel
--- a/src/core/hle/kernel/timer.h
+++ b/src/core/hle/kernel/timer.h
@@ -10,15 +10,19 @@

 namespace Kernel {

+class KernelCore;
+
 class Timer final : public WaitObject {
 public:
    /**
     * Creates a timer
+     * @param kernel The kernel instance to create the timer callback handle for.
     * @param reset_type ResetType describing how to create the timer
     * @param name Optional name of timer
     * @return The created Timer
     */
-    static SharedPtr<Timer> Create(ResetType reset_type, std::string name = "Unknown");
+    static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type,
+                                   std::string name = "Unknown");

    std::string GetTypeName() const override {
        return "Timer";
@@ -68,7 +72,7 @@ public:
    void Signal(int cycles_late);

 private:
-    Timer();
+    explicit Timer(KernelCore& kernel);
    ~Timer() override;

    ResetType reset_type; ///< The ResetType of this timer
@@ -83,9 +87,4 @@ private:
    Handle callback_handle;
 };

-/// Initializes the required variables for timers
-void TimersInit();
-/// Tears down the timer variables
-void TimersShutdown();
-
 } // namespace Kernel
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -12,6 +12,9 @@

 namespace Kernel {

+WaitObject::WaitObject(KernelCore& kernel) : Object{kernel} {}
+WaitObject::~WaitObject() = default;
+
 void WaitObject::AddWaitingThread(SharedPtr<Thread> thread) {
    auto itr = std::find(waiting_threads.begin(), waiting_threads.end(), thread);
    if (itr == waiting_threads.end())
--- a/src/core/hle/kernel/wait_object.h
+++ b/src/core/hle/kernel/wait_object.h
@@ -11,11 +11,15 @@

 namespace Kernel {

+class KernelCore;
 class Thread;

 /// Class that represents a Kernel object that a thread can be waiting on
 class WaitObject : public Object {
 public:
+    explicit WaitObject(KernelCore& kernel);
+    ~WaitObject() override;
+
    /**
     * Check if the specified thread should wait until the object is available
     * @param thread The thread about which we're deciding.
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -160,8 +160,9 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
    };
    RegisterHandlers(functions);

+    auto& kernel = Core::System::GetInstance().Kernel();
    launchable_event =
-        Kernel::Event::Create(Kernel::ResetType::Sticky, "ISelfController:LaunchableEvent");
+        Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "ISelfController:LaunchableEvent");
 }

 void ISelfController::SetFocusHandlingMode(Kernel::HLERequestContext& ctx) {
@@ -332,7 +333,8 @@ ICommonStateGetter::ICommonStateGetter() : ServiceFramework("ICommonStateGetter"
    };
    RegisterHandlers(functions);

-    event = Kernel::Event::Create(Kernel::ResetType::OneShot, "ICommonStateGetter:Event");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "ICommonStateGetter:Event");
 }

 void ICommonStateGetter::GetBootMode(Kernel::HLERequestContext& ctx) {
@@ -505,7 +507,8 @@ public:
        };
        RegisterHandlers(functions);

-        state_changed_event = Kernel::Event::Create(Kernel::ResetType::OneShot,
+        auto& kernel = Core::System::GetInstance().Kernel();
+        state_changed_event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot,
                                                    "ILibraryAppletAccessor:StateChangedEvent");
    }

--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -47,7 +47,9 @@ public:
        RegisterHandlers(functions);

        // This is the event handle used to check if the audio buffer was released
-        buffer_event = Kernel::Event::Create(Kernel::ResetType::Sticky, "IAudioOutBufferReleased");
+        auto& kernel = Core::System::GetInstance().Kernel();
+        buffer_event =
+            Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "IAudioOutBufferReleased");

        stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
                                       "IAudioOut", [=]() { buffer_event->Signal(); });
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -35,8 +35,9 @@ public:
        };
        RegisterHandlers(functions);

+        auto& kernel = Core::System::GetInstance().Kernel();
        system_event =
-            Kernel::Event::Create(Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+            Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
        renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event);
    }

@@ -121,8 +122,9 @@ public:
        };
        RegisterHandlers(functions);

-        buffer_event =
-            Kernel::Event::Create(Kernel::ResetType::OneShot, "IAudioOutBufferReleasedEvent");
+        auto& kernel = Core::System::GetInstance().Kernel();
+        buffer_event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot,
+                                             "IAudioOutBufferReleasedEvent");
    }

 private:
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -60,17 +60,20 @@ ResultCode VfsDirectoryServiceWrapper::CreateFile(const std::string& path_, u64

 ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) const {
    std::string path(FileUtil::SanitizePath(path_));
-    auto dir = GetDirectoryRelativeWrapped(backing, FileUtil::GetParentPath(path));
    if (path.empty()) {
        // TODO(DarkLordZach): Why do games call this and what should it do? Works as is but...
        return RESULT_SUCCESS;
    }
-    if (dir->GetFile(FileUtil::GetFilename(path)) == nullptr)
+
+    auto dir = GetDirectoryRelativeWrapped(backing, FileUtil::GetParentPath(path));
+    if (dir->GetFile(FileUtil::GetFilename(path)) == nullptr) {
        return FileSys::ERROR_PATH_NOT_FOUND;
+    }
    if (!dir->DeleteFile(FileUtil::GetFilename(path))) {
        // TODO(DarkLordZach): Find a better error code for this
        return ResultCode(-1);
    }
+
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -7,6 +7,7 @@
 #include <memory>
 #include "common/common_types.h"
 #include "core/file_sys/directory.h"
+#include "core/file_sys/vfs.h"
 #include "core/hle/result.h"

 namespace FileSys {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -26,6 +26,17 @@

 namespace Service::FileSystem {

+enum class FileSystemType : u8 {
+    Invalid0 = 0,
+    Invalid1 = 1,
+    Logo = 2,
+    ContentControl = 3,
+    ContentManual = 4,
+    ContentMeta = 5,
+    ContentData = 6,
+    ApplicationPackage = 7,
+};
+
 class IStorage final : public ServiceFramework<IStorage> {
 public:
    explicit IStorage(FileSys::VirtualFile backend_)
@@ -420,7 +431,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {0, nullptr, "MountContent"},
        {1, &FSP_SRV::Initialize, "Initialize"},
        {2, nullptr, "OpenDataFileSystemByCurrentProcess"},
-        {7, nullptr, "OpenFileSystemWithPatch"},
+        {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"},
        {8, nullptr, "OpenFileSystemWithId"},
        {9, nullptr, "OpenDataFileSystemByApplicationId"},
        {11, nullptr, "OpenBisFileSystem"},
@@ -444,7 +455,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
        {34, nullptr, "GetCacheStorageSize"},
        {51, &FSP_SRV::MountSaveData, "MountSaveData"},
        {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"},
-        {53, nullptr, "OpenReadOnlySaveDataFileSystem"},
+        {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"},
        {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"},
        {58, nullptr, "ReadSaveDataFileSystemExtraData"},
        {59, nullptr, "WriteSaveDataFileSystemExtraData"},
@@ -516,6 +527,16 @@ void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
 }

+void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+
+    const auto type = rp.PopRaw<FileSystemType>();
+    const auto title_id = rp.PopRaw<u64>();
+
+    IPC::ResponseBuilder rb{ctx, 2, 0, 0};
+    rb.Push(ResultCode(-1));
+}
+
 void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_FS, "called");

@@ -563,6 +584,11 @@ void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) {
    rb.PushIpcInterface<IFileSystem>(std::move(filesystem));
 }

+void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) {
+    LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem");
+    MountSaveData(ctx);
+}
+
 void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
    LOG_WARNING(Service_FS, "(STUBBED) called");

--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -20,9 +20,11 @@ public:

 private:
    void Initialize(Kernel::HLERequestContext& ctx);
+    void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx);
    void MountSdCard(Kernel::HLERequestContext& ctx);
    void CreateSaveData(Kernel::HLERequestContext& ctx);
    void MountSaveData(Kernel::HLERequestContext& ctx);
+    void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
    void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
    void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
    void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -4,6 +4,7 @@

 #include <atomic>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/frontend/emu_window.h"
@@ -35,9 +36,10 @@ public:
        };
        RegisterHandlers(functions);

+        auto& kernel = Core::System::GetInstance().Kernel();
        shared_mem = Kernel::SharedMemory::Create(
-            nullptr, 0x40000, Kernel::MemoryPermission::ReadWrite, Kernel::MemoryPermission::Read,
-            0, Kernel::MemoryRegion::BASE, "HID:SharedMemory");
+            kernel, nullptr, 0x40000, Kernel::MemoryPermission::ReadWrite,
+            Kernel::MemoryPermission::Read, 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory");

        // Register update callbacks
        pad_update_event = CoreTiming::RegisterEvent(
@@ -402,7 +404,8 @@ public:

        RegisterHandlers(functions);

-        event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle");
+        auto& kernel = Core::System::GetInstance().Kernel();
+        event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "hid:EventHandle");
    }
    ~Hid() = default;

--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -46,11 +46,13 @@ public:
        };
        RegisterHandlers(functions);

-        activate_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:ActivateEvent");
+        auto& kernel = Core::System::GetInstance().Kernel();
+        activate_event =
+            Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "IUser:ActivateEvent");
        deactivate_event =
-            Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
-        availability_change_event =
-            Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
+            Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
+        availability_change_event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot,
+                                                          "IUser:AvailabilityChangeEvent");
    }

 private:
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/service/nifm/nifm.h"
@@ -54,8 +55,9 @@ public:
        };
        RegisterHandlers(functions);

-        event1 = Kernel::Event::Create(Kernel::ResetType::OneShot, "IRequest:Event1");
-        event2 = Kernel::Event::Create(Kernel::ResetType::OneShot, "IRequest:Event2");
+        auto& kernel = Core::System::GetInstance().Kernel();
+        event1 = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "IRequest:Event1");
+        event2 = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "IRequest:Event2");
    }

 private:
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -266,8 +266,9 @@ void PL_U::GetSharedMemoryNativeHandle(Kernel::HLERequestContext& ctx) {
        SHARED_FONT_MEM_VADDR, shared_font, 0, SHARED_FONT_MEM_SIZE, Kernel::MemoryState::Shared);

    // Create shared font memory object
+    auto& kernel = Core::System::GetInstance().Kernel();
    shared_font_mem = Kernel::SharedMemory::Create(
-        Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
+        kernel, Core::CurrentProcess(), SHARED_FONT_MEM_SIZE, Kernel::MemoryPermission::ReadWrite,
        Kernel::MemoryPermission::Read, SHARED_FONT_MEM_VADDR, Kernel::MemoryRegion::BASE,
        "PL_U:shared_font_mem");

--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -7,6 +7,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
+#include "core/perf_stats.h"
 #include "video_core/gpu.h"
 #include "video_core/renderer_base.h"

@@ -31,7 +32,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
        transform, crop_rect};

    auto& instance = Core::System::GetInstance();
-    instance.perf_stats.EndGameFrame();
+    instance.GetPerfStats().EndGameFrame();
    instance.Renderer().SwapBuffers(framebuffer);
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,6 +10,7 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
+#include "video_core/memory_manager.h"

 namespace Service::Nvidia::Devices {

--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -4,6 +4,7 @@

 #include <cinttypes>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
 #include "core/hle/service/nvdrv/interface.h"
@@ -107,7 +108,8 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
    };
    RegisterHandlers(functions);

-    query_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "NVDRV::query_event");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    query_event = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot, "NVDRV::query_event");
 }

 } // namespace Service::Nvidia
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -6,14 +6,16 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"

 namespace Service {
 namespace NVFlinger {

 BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) {
+    auto& kernel = Core::System::GetInstance().Kernel();
    buffer_wait_event =
-        Kernel::Event::Create(Kernel::ResetType::Sticky, "BufferQueue NativeHandle");
+        Kernel::Event::Create(kernel, Kernel::ResetType::Sticky, "BufferQueue NativeHandle");
 }

 void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -17,6 +17,7 @@
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "core/perf_stats.h"
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"

@@ -137,7 +138,7 @@ void NVFlinger::Compose() {
            auto& system_instance = Core::System::GetInstance();

            // There was no queued buffer to draw, render previous frame
-            system_instance.perf_stats.EndGameFrame();
+            system_instance.GetPerfStats().EndGameFrame();
            system_instance.Renderer().SwapBuffers({});
            continue;
        }
@@ -161,7 +162,8 @@ void NVFlinger::Compose() {
 Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}

 Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
-    vsync_event = Kernel::Event::Create(Kernel::ResetType::Pulse, "Display VSync Event");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    vsync_event = Kernel::Event::Create(kernel, Kernel::ResetType::Pulse, "Display VSync Event");
 }

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -12,6 +12,7 @@
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_port.h"
 #include "core/hle/kernel/thread.h"
@@ -107,19 +108,24 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager)

 void ServiceFrameworkBase::InstallAsNamedPort() {
    ASSERT(port == nullptr);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
    SharedPtr<ServerPort> server_port;
    SharedPtr<ClientPort> client_port;
-    std::tie(server_port, client_port) = ServerPort::CreatePortPair(max_sessions, service_name);
+    std::tie(server_port, client_port) =
+        ServerPort::CreatePortPair(kernel, max_sessions, service_name);
    server_port->SetHleHandler(shared_from_this());
-    AddNamedPort(service_name, std::move(client_port));
+    kernel.AddNamedPort(service_name, std::move(client_port));
 }

 Kernel::SharedPtr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort() {
    ASSERT(port == nullptr);
+
+    auto& kernel = Core::System::GetInstance().Kernel();
    Kernel::SharedPtr<Kernel::ServerPort> server_port;
    Kernel::SharedPtr<Kernel::ClientPort> client_port;
    std::tie(server_port, client_port) =
-        Kernel::ServerPort::CreatePortPair(max_sessions, service_name);
+        Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name);
    port = MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port)).Unwrap();
    port->SetHleHandler(shared_from_this());
    return client_port;
@@ -192,11 +198,6 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Module interface

-// TODO(yuriks): Move to kernel
-void AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
-    g_kernel_named_ports.emplace(std::move(name), std::move(port));
-}
-
 /// Initialize ServiceManager
 void Init(std::shared_ptr<SM::ServiceManager>& sm, const FileSys::VirtualFilesystem& rfs) {
    // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
@@ -259,7 +260,6 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, const FileSys::VirtualFilesys

 /// Shutdown ServiceManager
 void Shutdown() {
-    g_kernel_named_ports.clear();
    LOG_DEBUG(Service, "shutdown OK");
 }
 } // namespace Service
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -6,7 +6,6 @@

 #include <cstddef>
 #include <string>
-#include <unordered_map>
 #include <boost/container/flat_map.hpp>
 #include "common/common_types.h"
 #include "core/hle/kernel/hle_ipc.h"
@@ -187,10 +186,4 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm,
 /// Shutdown ServiceManager
 void Shutdown();

-/// Map of named ports managed by the kernel, which can be retrieved using the ConnectToPort SVC.
-extern std::unordered_map<std::string, Kernel::SharedPtr<Kernel::ClientPort>> g_kernel_named_ports;
-
-/// Adds a port to the named port table
-void AddNamedPort(std::string name, Kernel::SharedPtr<Kernel::ClientPort> port);
-
 } // namespace Service
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -4,6 +4,7 @@

 #include <tuple>
 #include "common/assert.h"
+#include "core/core.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
@@ -47,9 +48,11 @@ ResultVal<Kernel::SharedPtr<Kernel::ServerPort>> ServiceManager::RegisterService
    if (registered_services.find(name) != registered_services.end())
        return ERR_ALREADY_REGISTERED;

+    auto& kernel = Core::System::GetInstance().Kernel();
    Kernel::SharedPtr<Kernel::ServerPort> server_port;
    Kernel::SharedPtr<Kernel::ClientPort> client_port;
-    std::tie(server_port, client_port) = Kernel::ServerPort::CreatePortPair(max_sessions, name);
+    std::tie(server_port, client_port) =
+        Kernel::ServerPort::CreatePortPair(kernel, max_sessions, name);

    registered_services.emplace(std::move(name), std::move(client_port));
    return MakeResult<Kernel::SharedPtr<Kernel::ServerPort>>(std::move(server_port));
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -6,10 +6,12 @@
 #include "common/common_funcs.h"
 #include "common/file_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/file_sys/content_archive.h"
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/romfs_factory.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/hle/service/filesystem/filesystem.h"
@@ -117,10 +119,11 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(
        }
    }

+    auto& kernel = Core::System::GetInstance().Kernel();
    process->program_id = metadata.GetTitleID();
    process->svc_access_mask.set();
    process->resource_limit =
-        Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
+        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
    process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),
                 metadata.GetMainThreadStackSize());

--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -9,6 +9,8 @@
 #include "common/common_types.h"
 #include "common/file_util.h"
 #include "common/logging/log.h"
+#include "core/core.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/loader/elf.h"
@@ -300,7 +302,8 @@ SharedPtr<CodeSet> ElfReader::LoadInto(u32 vaddr) {
    std::vector<u8> program_image(total_image_size);
    size_t current_image_position = 0;

-    SharedPtr<CodeSet> codeset = CodeSet::Create("");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    SharedPtr<CodeSet> codeset = CodeSet::Create(kernel, "");

    for (unsigned int i = 0; i < header->e_phnum; ++i) {
        Elf32_Phdr* p = &segments[i];
@@ -400,8 +403,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr<Kernel::Process>& process) {
    process->svc_access_mask.set();

    // Attach the default resource limit (APPLICATION) to the process
+    auto& kernel = Core::System::GetInstance().Kernel();
    process->resource_limit =
-        Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
+        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);

    process->Run(codeset->entrypoint, 48, Memory::DEFAULT_STACK_SIZE);

--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -5,9 +5,9 @@
 #include <memory>
 #include <ostream>
 #include <string>
+#include "common/file_util.h"
 #include "common/logging/log.h"
 #include "common/string_util.h"
-#include "core/file_sys/vfs_real.h"
 #include "core/hle/kernel/process.h"
 #include "core/loader/deconstructed_rom_directory.h"
 #include "core/loader/elf.h"
@@ -144,6 +144,9 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status) {
    return os;
 }

+AppLoader::AppLoader(FileSys::VirtualFile file) : file(std::move(file)) {}
+AppLoader::~AppLoader() = default;
+
 /**
 * Get a loader for a file with a specific type
 * @param file The file to load
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
@@ -12,7 +11,6 @@
 #include <vector>
 #include <boost/optional.hpp>
 #include "common/common_types.h"
-#include "common/file_util.h"
 #include "core/file_sys/vfs.h"
 #include "core/hle/kernel/object.h"

@@ -114,8 +112,8 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status);
 /// Interface for loading an application
 class AppLoader : NonCopyable {
 public:
-    explicit AppLoader(FileSys::VirtualFile file) : file(std::move(file)) {}
-    virtual ~AppLoader() {}
+    explicit AppLoader(FileSys::VirtualFile file);
+    virtual ~AppLoader();

    /**
     * Returns the type of this file
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -14,6 +14,7 @@
 #include "core/file_sys/control_metadata.h"
 #include "core/file_sys/vfs_offset.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/loader/nro.h"
@@ -136,7 +137,8 @@ bool AppLoader_NRO::LoadNro(FileSys::VirtualFile file, VAddr load_base) {
    }

    // Build program image
-    Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create("");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create(kernel, "");
    std::vector<u8> program_image = file->ReadBytes(PageAlignSize(nro_header.file_size));
    if (program_image.size() != PageAlignSize(nro_header.file_size)) {
        return {};
@@ -185,9 +187,10 @@ ResultStatus AppLoader_NRO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
        return ResultStatus::ErrorLoadingNRO;
    }

+    auto& kernel = Core::System::GetInstance().Kernel();
    process->svc_access_mask.set();
    process->resource_limit =
-        Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
+        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
    process->Run(base_addr, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);

    is_loaded = true;
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -11,6 +11,7 @@
 #include "common/swap.h"
 #include "core/core.h"
 #include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
 #include "core/loader/nso.h"
@@ -100,7 +101,8 @@ VAddr AppLoader_NSO::LoadModule(FileSys::VirtualFile file, VAddr load_base) {
        return {};

    // Build program image
-    Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create("");
+    auto& kernel = Core::System::GetInstance().Kernel();
+    Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create(kernel, "");
    std::vector<u8> program_image;
    for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
        const std::vector<u8> compressed_data =
@@ -151,9 +153,10 @@ ResultStatus AppLoader_NSO::Load(Kernel::SharedPtr<Kernel::Process>& process) {
    LoadModule(file, Memory::PROCESS_IMAGE_VADDR);
    LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), Memory::PROCESS_IMAGE_VADDR);

+    auto& kernel = Core::System::GetInstance().Kernel();
    process->svc_access_mask.set();
    process->resource_limit =
-        Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
+        kernel.ResourceLimitForCategory(Kernel::ResourceLimitCategory::APPLICATION);
    process->Run(Memory::PROCESS_IMAGE_VADDR, THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);

    is_loaded = true;
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -251,8 +251,8 @@ std::string ReadCString(VAddr vaddr, std::size_t max_length) {
    return string;
 }

-void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) {
-    if (gpu_addr == 0) {
+void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
+    if (vaddr == 0) {
        return;
    }

@@ -261,19 +261,8 @@ void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached)
    // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This
    // assumes the specified GPU address region is contiguous as well.

-    u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1;
-    for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) {
-        boost::optional<VAddr> maybe_vaddr =
-            Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-        // The GPU <-> CPU virtual memory mapping is not 1:1
-        if (!maybe_vaddr) {
-            LOG_ERROR(HW_Memory,
-                      "Trying to flush a cached region to an invalid physical address {:016X}",
-                      gpu_addr);
-            continue;
-        }
-        VAddr vaddr = *maybe_vaddr;
-
+    u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
+    for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
        PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];

        if (cached) {
@@ -344,29 +333,19 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {

        const VAddr overlap_start = std::max(start, region_start);
        const VAddr overlap_end = std::min(end, region_end);
-
-        const std::vector<Tegra::GPUVAddr> gpu_addresses =
-            system_instance.GPU().MemoryManager().CpuToGpuAddress(overlap_start);
-
-        if (gpu_addresses.empty()) {
-            return;
-        }
-
        const u64 overlap_size = overlap_end - overlap_start;

-        for (const auto& gpu_address : gpu_addresses) {
-            auto& rasterizer = system_instance.Renderer().Rasterizer();
-            switch (mode) {
-            case FlushMode::Flush:
-                rasterizer.FlushRegion(gpu_address, overlap_size);
-                break;
-            case FlushMode::Invalidate:
-                rasterizer.InvalidateRegion(gpu_address, overlap_size);
-                break;
-            case FlushMode::FlushAndInvalidate:
-                rasterizer.FlushAndInvalidateRegion(gpu_address, overlap_size);
-                break;
-            }
+        auto& rasterizer = system_instance.Renderer().Rasterizer();
+        switch (mode) {
+        case FlushMode::Flush:
+            rasterizer.FlushRegion(overlap_start, overlap_size);
+            break;
+        case FlushMode::Invalidate:
+            rasterizer.InvalidateRegion(overlap_start, overlap_size);
+            break;
+        case FlushMode::FlushAndInvalidate:
+            rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size);
+            break;
        }
    };

--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -11,7 +11,6 @@
 #include <boost/icl/interval_map.hpp>
 #include "common/common_types.h"
 #include "core/memory_hook.h"
-#include "video_core/memory_manager.h"

 namespace Kernel {
 class Process;
@@ -179,7 +178,7 @@ enum class FlushMode {
 /**
 * Mark each page touching the region as cached.
 */
-void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached);
+void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);

 /**
 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -40,7 +40,7 @@ void PerfStats::EndGameFrame() {
    game_frames += 1;
 }

-PerfStats::Results PerfStats::GetAndResetStats(microseconds current_system_time_us) {
+PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
    std::lock_guard<std::mutex> lock(object_mutex);

    const auto now = Clock::now();
@@ -49,7 +49,7 @@ PerfStats::Results PerfStats::GetAndResetStats(microseconds current_system_time_

    const auto system_us_per_second = (current_system_time_us - reset_point_system_us) / interval;

-    Results results{};
+    PerfStatsResults results{};
    results.system_fps = static_cast<double>(system_frames) / interval;
    results.game_fps = static_cast<double>(game_frames) / interval;
    results.frametime = duration_cast<DoubleSecs>(accumulated_frametime).count() /
--- a/src/core/perf_stats.h
+++ b/src/core/perf_stats.h
@@ -10,6 +10,17 @@

 namespace Core {

+struct PerfStatsResults {
+    /// System FPS (LCD VBlanks) in Hz
+    double system_fps;
+    /// Game FPS (GSP frame submissions) in Hz
+    double game_fps;
+    /// Walltime per system frame, in seconds, excluding any waits
+    double frametime;
+    /// Ratio of walltime / emulated time elapsed
+    double emulation_speed;
+};
+
 /**
 * Class to manage and query performance/timing statistics. All public functions of this class are
 * thread-safe unless stated otherwise.
@@ -18,22 +29,11 @@ class PerfStats {
 public:
    using Clock = std::chrono::high_resolution_clock;

-    struct Results {
-        /// System FPS (LCD VBlanks) in Hz
-        double system_fps;
-        /// Game FPS (GSP frame submissions) in Hz
-        double game_fps;
-        /// Walltime per system frame, in seconds, excluding any waits
-        double frametime;
-        /// Ratio of walltime / emulated time elapsed
-        double emulation_speed;
-    };
-
    void BeginSystemFrame();
    void EndSystemFrame();
    void EndGameFrame();

-    Results GetAndResetStats(std::chrono::microseconds current_system_time_us);
+    PerfStatsResults GetAndResetStats(std::chrono::microseconds current_system_time_us);

    /**
     * Gets the ratio between walltime and the emulated time of the previous system frame. This is
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -7,6 +7,7 @@
 #include "common/file_util.h"

 #include "core/core.h"
+#include "core/loader/loader.h"
 #include "core/settings.h"
 #include "core/telemetry_session.h"

--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -13,7 +13,7 @@ namespace ArmTests {
 TestEnvironment::TestEnvironment(bool mutable_memory_)
    : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {

-    Core::CurrentProcess() = Kernel::Process::Create("");
+    Core::CurrentProcess() = Kernel::Process::Create(kernel, "");
    page_table = &Core::CurrentProcess()->vm_manager.page_table;

    page_table->pointers.fill(nullptr);
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,6 +9,7 @@
 #include <vector>

 #include "common/common_types.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/memory_hook.h"

 namespace Memory {
@@ -86,6 +87,7 @@ private:
    std::shared_ptr<TestMemory> test_memory;
    std::vector<WriteRecord> write_records;
    Memory::PageTable* page_table = nullptr;
+    Kernel::KernelCore kernel;
 };

 } // namespace ArmTests
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -34,6 +34,8 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
              "{:08X} remaining params {}",
              method, subchannel, value, remaining_params);

+    ASSERT(subchannel < bound_engines.size());
+
    if (method == static_cast<u32>(BufferMethods::BindObject)) {
        // Bind the current subchannel to the desired engine id.
        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
@@ -47,8 +49,6 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
        return;
    }

-    ASSERT(bound_engines.find(subchannel) != bound_engines.end());
-
    const EngineID engine = bound_engines[subchannel];

    switch (engine) {
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -5,13 +5,13 @@
 #include <cinttypes>
 #include "common/assert.h"
 #include "core/core.h"
+#include "core/core_timing.h"
+#include "core/memory.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
-#include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"
-#include "video_core/video_core.h"

 namespace Tegra {
 namespace Engines {
@@ -195,8 +195,8 @@ void Maxwell3D::ProcessQueryGet() {
            // wait queues.
            LongQueryResult query_result{};
            query_result.value = result;
-            // TODO(Subv): Generate a real GPU timestamp and write it here instead of 0
-            query_result.timestamp = 0;
+            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
+            query_result.timestamp = CoreTiming::GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
        break;
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -147,6 +147,7 @@ enum class PredCondition : u64 {
    LessThanWithNan = 9,
    GreaterThanWithNan = 12,
    NotEqualWithNan = 13,
+    GreaterEqualWithNan = 14,
    // TODO(Subv): Other condition types
 };

@@ -242,6 +243,9 @@ enum class TextureType : u64 {
    TextureCube = 3,
 };

+enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
+enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
+
 union Instruction {
    Instruction& operator=(const Instruction& instr) {
        value = instr.value;
@@ -325,6 +329,16 @@ union Instruction {
    } alu;

    union {
+        BitField<51, 1, u64> saturate;
+        BitField<52, 2, IpaSampleMode> sample_mode;
+        BitField<54, 2, IpaInterpMode> interp_mode;
+    } ipa;
+
+    union {
+        BitField<39, 2, u64> tab5cb8_2;
+        BitField<41, 3, u64> tab5c68_1;
+        BitField<44, 2, u64> tab5c68_0;
+        BitField<47, 1, u64> cc;
        BitField<48, 1, u64> negate_b;
    } fmul;

@@ -338,6 +352,10 @@ union Instruction {
        BitField<49, 1, u64> negate_a;
    } alu_integer;

+    union {
+        BitField<40, 1, u64> invert;
+    } popc;
+
    union {
        BitField<39, 3, u64> pred;
        BitField<42, 1, u64> neg_pred;
@@ -388,8 +406,11 @@ union Instruction {
    } flow;

    union {
+        BitField<47, 1, u64> cc;
        BitField<48, 1, u64> negate_b;
        BitField<49, 1, u64> negate_c;
+        BitField<51, 2, u64> tab5980_1;
+        BitField<53, 2, u64> tab5980_0;
    } ffma;

    union {
@@ -498,6 +519,7 @@ union Instruction {
    union {
        BitField<0, 8, Register> gpr0;
        BitField<28, 8, Register> gpr28;
+        BitField<49, 1, u64> nodep;
        BitField<50, 3, u64> component_mask_selector;
        BitField<53, 4, u64> texture_info;

@@ -665,6 +687,9 @@ public:
        ISCADD_C, // Scale and Add
        ISCADD_R,
        ISCADD_IMM,
+        POPC_C,
+        POPC_R,
+        POPC_IMM,
        SEL_C,
        SEL_R,
        SEL_IMM,
@@ -886,6 +911,9 @@ private:
            INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
            INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
            INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
+            INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
+            INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
+            INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
            INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
            INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
            INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -4,8 +4,8 @@

 #pragma once

+#include <array>
 #include <memory>
-#include <unordered_map>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/memory_manager.h"
@@ -136,7 +136,7 @@ private:
    std::unique_ptr<Tegra::MemoryManager> memory_manager;

    /// Mapping of command subchannels to their bound engine ids.
-    std::unordered_map<u32, EngineID> bound_engines;
+    std::array<EngineID, 8> bound_engines = {};

    /// 3D engine
    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,113 +4,86 @@

 #pragma once

-#include <unordered_map>
+#include <set>
+
 #include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range.hpp>
+#include <boost/range/iterator_range_core.hpp>

 #include "common/common_types.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "core/core.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_base.h"

 template <class T>
 class RasterizerCache : NonCopyable {
 public:
    /// Mark the specified region as being invalidated
-    void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) {
-        for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) {
-            const auto& object{iter->second};
+    void InvalidateRegion(VAddr addr, u64 size) {
+        if (size == 0)
+            return;

-            ++iter;
+        const ObjectInterval interval{addr, addr + size};
+        for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
+            for (auto& cached_object : pair.second) {
+                if (!cached_object)
+                    continue;

-            if (object->GetAddr() <= (region_addr + region_size) &&
-                region_addr <= (object->GetAddr() + object->GetSizeInBytes())) {
-                // Regions overlap, so invalidate
-                Unregister(object);
+                remove_objects.emplace(cached_object);
            }
        }
+
+        for (auto& remove_object : remove_objects) {
+            Unregister(remove_object);
+        }
+
+        remove_objects.clear();
+    }
+
+    /// Invalidates everything in the cache
+    void InvalidateAll() {
+        while (object_cache.begin() != object_cache.end()) {
+            Unregister(*object_cache.begin()->second.begin());
+        }
    }

 protected:
    /// Tries to get an object from the cache with the specified address
-    T TryGet(Tegra::GPUVAddr addr) const {
-        const auto& search{cached_objects.find(addr)};
-        if (search != cached_objects.end()) {
-            return search->second;
+    T TryGet(VAddr addr) const {
+        const ObjectInterval interval{addr};
+        for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
+            for (auto& cached_object : pair.second) {
+                if (cached_object->GetAddr() == addr) {
+                    return cached_object;
+                }
+            }
        }
-
        return nullptr;
    }

-    /// Gets a reference to the cache
-    const std::unordered_map<Tegra::GPUVAddr, T>& GetCache() const {
-        return cached_objects;
-    }
-
    /// Register an object into the cache
    void Register(const T& object) {
-        const auto& search{cached_objects.find(object->GetAddr())};
-        if (search != cached_objects.end()) {
-            // Registered already
-            return;
-        }
-
-        cached_objects[object->GetAddr()] = object;
-        UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
+        object_cache.add({GetInterval(object), ObjectSet{object}});
+        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
+        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
    }

    /// Unregisters an object from the cache
    void Unregister(const T& object) {
-        const auto& search{cached_objects.find(object->GetAddr())};
-        if (search == cached_objects.end()) {
-            // Unregistered already
-            return;
-        }
-
-        UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-        cached_objects.erase(search);
+        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
+        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
+        object_cache.subtract({GetInterval(object), ObjectSet{object}});
    }

 private:
-    using PageMap = boost::icl::interval_map<u64, int>;
+    using ObjectSet = std::set<T>;
+    using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>;
+    using ObjectInterval = typename ObjectCache::interval_type;

-    template <typename Map, typename Interval>
-    constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
-        return boost::make_iterator_range(map.equal_range(interval));
+    static auto GetInterval(const T& object) {
+        return ObjectInterval::right_open(object->GetAddr(),
+                                          object->GetAddr() + object->GetSizeInBytes());
    }

-    /// Increase/decrease the number of object in pages touching the specified region
-    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
-        const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS};
-        const u64 page_end{(addr + size) >> Tegra::MemoryManager::PAGE_BITS};
-
-        // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
-        // subtract after iterating
-        const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
-        if (delta > 0)
-            cached_pages.add({pages_interval, delta});
-
-        for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
-            const auto interval = pair.first & pages_interval;
-            const int count = pair.second;
-
-            const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
-                                                        << Tegra::MemoryManager::PAGE_BITS;
-            const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
-                                                      << Tegra::MemoryManager::PAGE_BITS;
-            const u64 interval_size = interval_end_addr - interval_start_addr;
-
-            if (delta > 0 && count == delta)
-                Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
-            else if (delta < 0 && count == -delta)
-                Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
-            else
-                ASSERT(count >= 0);
-        }
-
-        if (delta < 0)
-            cached_pages.add({pages_interval, delta});
-    }
-
-    std::unordered_map<Tegra::GPUVAddr, T> cached_objects;
-    PageMap cached_pages;
+    ObjectCache object_cache;
+    ObjectSet remove_objects;
 };
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -27,14 +27,14 @@ public:
    virtual void FlushAll() = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0;
+    virtual void FlushRegion(VAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be invalidated
-    virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0;
+    virtual void InvalidateRegion(VAddr addr, u64 size) = 0;

    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
    /// and invalidated
-    virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0;
+    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;

    /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0
    virtual bool AccelerateDisplayTransfer(const void* config) {
@@ -60,5 +60,8 @@ public:
    virtual bool AccelerateDrawBatch(bool is_indexed) {
        return false;
    }
+
+    /// Increase/decrease the number of object in pages touching the specified region
+    virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
 };
 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -274,6 +274,41 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
    return true;
 }

+template <typename Map, typename Interval>
+static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+    return boost::make_iterator_range(map.equal_range(interval));
+}
+
+void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+    const u64 page_start{addr >> Memory::PAGE_BITS};
+    const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
+
+    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
+    // subtract after iterating
+    const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
+    if (delta > 0)
+        cached_pages.add({pages_interval, delta});
+
+    for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
+        const auto interval = pair.first & pages_interval;
+        const int count = pair.second;
+
+        const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
+        const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
+        const u64 interval_size = interval_end_addr - interval_start_addr;
+
+        if (delta > 0 && count == delta)
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+        else if (delta < 0 && count == -delta)
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+        else
+            ASSERT(count >= 0);
+    }
+
+    if (delta < 0)
+        cached_pages.add({pages_interval, delta});
+}
+
 std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
                                                                    bool using_depth_fb,
                                                                    bool preserve_contents) {
@@ -397,16 +432,6 @@ void RasterizerOpenGL::Clear() {
    glClearStencil(regs.clear_stencil);

    glClear(clear_mask);
-
-    // Mark framebuffer surfaces as dirty
-    if (Settings::values.use_accurate_framebuffers) {
-        if (dirty_color_surface != nullptr) {
-            res_cache.FlushSurface(dirty_color_surface);
-        }
-        if (dirty_depth_surface != nullptr) {
-            res_cache.FlushSurface(dirty_depth_surface);
-        }
-    }
 }

 std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
@@ -522,16 +547,6 @@ void RasterizerOpenGL::DrawArrays() {
        texture_unit.Unbind();
    }
    state.Apply();
-
-    // Mark framebuffer surfaces as dirty
-    if (Settings::values.use_accurate_framebuffers) {
-        if (dirty_color_surface != nullptr) {
-            res_cache.FlushSurface(dirty_color_surface);
-        }
-        if (dirty_depth_surface != nullptr) {
-            res_cache.FlushSurface(dirty_depth_surface);
-        }
-    }
 }

 void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
@@ -540,17 +555,17 @@ void RasterizerOpenGL::FlushAll() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 }

-void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
 }

-void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
+void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.InvalidateRegion(addr, size);
    shader_cache.InvalidateRegion(addr, size);
 }

-void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
+void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    InvalidateRegion(addr, size);
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -10,7 +10,11 @@
 #include <tuple>
 #include <utility>
 #include <vector>
+
+#include <boost/icl/interval_map.hpp>
+#include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>
+
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
@@ -40,15 +44,16 @@ public:
    void Clear() override;
    void NotifyMaxwellRegisterChanged(u32 method) override;
    void FlushAll() override;
-    void FlushRegion(Tegra::GPUVAddr addr, u64 size) override;
-    void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override;
-    void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override;
+    void FlushRegion(VAddr addr, u64 size) override;
+    void InvalidateRegion(VAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateDisplayTransfer(const void* config) override;
    bool AccelerateTextureCopy(const void* config) override;
    bool AccelerateFill(const void* config) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
+    void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;

    /// OpenGL shader generated for a given Maxwell register state
    struct MaxwellShader {
@@ -187,6 +192,9 @@ private:

    enum class AccelDraw { Disabled, Arrays, Indexed };
    AccelDraw accelerate_draw = AccelDraw::Disabled;
+
+    using CachedPageMap = boost::icl::interval_map<u64, int>;
+    CachedPageMap cached_pages;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -33,11 +33,16 @@ struct FormatTuple {
    bool compressed;
 };

+static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
+    auto& gpu{Core::System::GetInstance().GPU()};
+    const auto cpu_addr{gpu.MemoryManager().GpuToCpuAddress(gpu_addr)};
+    return cpu_addr ? *cpu_addr : 0;
+}
+
 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
    const Tegra::Texture::FullTextureInfo& config) {
-
    SurfaceParams params{};
-    params.addr = config.tic.Address();
+    params.addr = TryGetCpuAddr(config.tic.Address());
    params.is_tiled = config.tic.IsTiled();
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
    params.pixel_format =
@@ -55,9 +60,8 @@ struct FormatTuple {

 /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
-
    SurfaceParams params{};
-    params.addr = config.Address();
+    params.addr = TryGetCpuAddr(config.Address());
    params.is_tiled = true;
    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
@@ -75,9 +79,8 @@ struct FormatTuple {
 /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
                                                             Tegra::GPUVAddr zeta_address,
                                                             Tegra::DepthFormat format) {
-
    SurfaceParams params{};
-    params.addr = zeta_address;
+    params.addr = TryGetCpuAddr(zeta_address);
    params.is_tiled = true;
    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
    params.pixel_format = PixelFormatFromDepthFormat(format);
@@ -120,7 +123,11 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     true},                                                                     // DXN2UNORM
    {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
    {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true},                                                                    // BC7U
+     true}, // BC7U
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8,
+     ComponentType::UNorm, true}, // BC6H_UF16
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true},                                                                    // BC6H_SF16
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // G8R8U
    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                     // G8R8S
@@ -167,11 +174,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-VAddr SurfaceParams::GetCpuAddr() const {
-    auto& gpu = Core::System::GetInstance().GPU();
-    return *gpu.MemoryManager().GpuToCpuAddress(addr);
-}
-
 static bool IsPixelFormatASTC(PixelFormat format) {
    switch (format) {
    case PixelFormat::ASTC_2D_4X4:
@@ -210,39 +212,36 @@ static bool IsFormatBCn(PixelFormat format) {
    case PixelFormat::DXN2SNORM:
    case PixelFormat::DXN2UNORM:
    case PixelFormat::BC7U:
+    case PixelFormat::BC6H_UF16:
+    case PixelFormat::BC6H_SF16:
        return true;
    }
    return false;
 }

 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer,
-                Tegra::GPUVAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) {
    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-    auto& gpu = Core::System::GetInstance().GPU();

    if (morton_to_gl) {
        // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
        // pixel values.
        const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
-        const std::vector<u8> data =
-            Tegra::Texture::UnswizzleTexture(*gpu.MemoryManager().GpuToCpuAddress(addr), tile_size,
-                                             bytes_per_pixel, stride, height, block_height);
+        const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
+            addr, tile_size, bytes_per_pixel, stride, height, block_height);
        const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
        gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
    } else {
        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
        // check the configuration for this and perform more generic un/swizzle
        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(
-            stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-            Memory::GetPointer(*gpu.MemoryManager().GpuToCpuAddress(addr)), gl_buffer.data(),
-            morton_to_gl);
+        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+                                       Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl);
    }
 }

-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
                            SurfaceParams::MaxPixelFormat>
    morton_to_gl_fns = {
        // clang-format off
@@ -266,6 +265,8 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
        MortonCopy<true, PixelFormat::DXN2UNORM>,
        MortonCopy<true, PixelFormat::DXN2SNORM>,
        MortonCopy<true, PixelFormat::BC7U>,
+        MortonCopy<true, PixelFormat::BC6H_UF16>,
+        MortonCopy<true, PixelFormat::BC6H_SF16>,
        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
        MortonCopy<true, PixelFormat::G8R8U>,
        MortonCopy<true, PixelFormat::G8R8S>,
@@ -297,7 +298,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
        // clang-format on
 };

-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
+static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
                            SurfaceParams::MaxPixelFormat>
    gl_to_morton_fns = {
        // clang-format off
@@ -314,8 +315,10 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
        MortonCopy<false, PixelFormat::RGBA16UI>,
        MortonCopy<false, PixelFormat::R11FG11FB10F>,
        MortonCopy<false, PixelFormat::RGBA32UI>,
-        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
-        // supported
+        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
+        // formats are not supported
+        nullptr,
+        nullptr,
        nullptr,
        nullptr,
        nullptr,
@@ -532,7 +535,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64
 void CachedSurface::LoadGLBuffer() {
    ASSERT(params.type != SurfaceType::Fill);

-    const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr());
+    const u8* const texture_src_data = Memory::GetPointer(params.addr);

    ASSERT(texture_src_data);

@@ -557,7 +560,7 @@ void CachedSurface::LoadGLBuffer() {

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
 void CachedSurface::FlushGLBuffer() {
-    u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr());
+    u8* const dst_buffer = Memory::GetPointer(params.addr);

    ASSERT(dst_buffer);
    ASSERT(gl_buffer.size() ==
@@ -754,19 +757,10 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
        return {};
    }

-    auto& gpu = Core::System::GetInstance().GPU();
-    // Don't try to create any entries in the cache if the address of the texture is invalid.
-    if (gpu.MemoryManager().GpuToCpuAddress(params.addr) == boost::none)
-        return {};
-
    // Look up surface in the cache based on address
    Surface surface{TryGet(params.addr)};
    if (surface) {
-        if (Settings::values.use_accurate_framebuffers) {
-            // If use_accurate_framebuffers is enabled, always load from memory
-            FlushSurface(surface);
-            Unregister(surface);
-        } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
+        if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
            // Use the cached surface as-is
            return surface;
        } else if (preserve_contents) {
@@ -782,15 +776,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
        }
    }

-    // Try to get a previously reserved surface
-    surface = TryGetReservedSurface(params);
-
-    // No surface found - create a new one
-    if (!surface) {
-        surface = std::make_shared<CachedSurface>(params);
-        ReserveSurface(surface);
-        Register(surface);
-    }
+    // No cached surface found - get a new one
+    surface = GetUncachedSurface(params);
+    Register(surface);

    // Only load surface from memory if we care about the contents
    if (preserve_contents) {
@@ -800,13 +788,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
    return surface;
 }

+Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
+    Surface surface{TryGetReservedSurface(params)};
+    if (!surface) {
+        // No reserved surface available, create a new one and reserve it
+        surface = std::make_shared<CachedSurface>(params);
+        ReserveSurface(surface);
+    }
+    return surface;
+}
+
 Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
                                               const SurfaceParams& new_params) {
    // Verify surface is compatible for blitting
    const auto& params{surface->GetSurfaceParams()};

-    // Create a new surface with the new parameters, and blit the previous surface to it
-    Surface new_surface{std::make_shared<CachedSurface>(new_params)};
+    // Get a new surface with the new parameters, and blit the previous surface to it
+    Surface new_surface{GetUncachedSurface(new_params)};

    // If format is unchanged, we can do a faster blit without reinterpreting pixel data
    if (params.pixel_format == new_params.pixel_format) {
@@ -816,92 +814,73 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
        return new_surface;
    }

-    auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
-    auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
+    // When using accurate framebuffers, always copy old data to new surface, regardless of format
+    if (Settings::values.use_accurate_framebuffers) {
+        auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
+        auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);

-    size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+        size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());

-    // Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
-    // using the new format.
-    OGLBuffer pbo;
-    pbo.Create();
+        // Use a Pixel Buffer Object to download the previous texture and then upload it to the new
+        // one using the new format.
+        OGLBuffer pbo;
+        pbo.Create();

-    glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
-    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
-    if (source_format.compressed) {
-        glGetCompressedTextureImage(surface->Texture().handle, 0,
-                                    static_cast<GLsizei>(params.SizeInBytes()), nullptr);
-    } else {
-        glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type,
-                          static_cast<GLsizei>(params.SizeInBytes()), nullptr);
-    }
-    // If the new texture is bigger than the previous one, we need to fill in the rest with data
-    // from the CPU.
-    if (params.SizeInBytes() < new_params.SizeInBytes()) {
-        // Upload the rest of the memory.
-        if (new_params.is_tiled) {
-            // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest of
-            // the data in this case. Games like Super Mario Odyssey seem to hit this case when
-            // drawing, it re-uses the memory of a previous texture as a bigger framebuffer but it
-            // doesn't clear it beforehand, the texture is already full of zeros.
-            LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during "
-                                 "reinterpretation but the texture is tiled.");
+        glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
+        glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
+        if (source_format.compressed) {
+            glGetCompressedTextureImage(surface->Texture().handle, 0,
+                                        static_cast<GLsizei>(params.SizeInBytes()), nullptr);
+        } else {
+            glGetTextureImage(surface->Texture().handle, 0, source_format.format,
+                              source_format.type, static_cast<GLsizei>(params.SizeInBytes()),
+                              nullptr);
        }
-        size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
-        auto address = Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(
-            new_params.addr + params.SizeInBytes());
-        std::vector<u8> data(remaining_size);
-        Memory::ReadBlock(*address, data.data(), data.size());
-        glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data());
+        // If the new texture is bigger than the previous one, we need to fill in the rest with data
+        // from the CPU.
+        if (params.SizeInBytes() < new_params.SizeInBytes()) {
+            // Upload the rest of the memory.
+            if (new_params.is_tiled) {
+                // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
+                // of the data in this case. Games like Super Mario Odyssey seem to hit this case
+                // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
+                // but it doesn't clear it beforehand, the texture is already full of zeros.
+                LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during "
+                                     "reinterpretation but the texture is tiled.");
+            }
+            size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
+            std::vector<u8> data(remaining_size);
+            Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
+            glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
+                            data.data());
+        }
+
+        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+
+        const auto& dest_rect{new_params.GetRect()};
+
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
+        if (dest_format.compressed) {
+            glCompressedTexSubImage2D(
+                GL_TEXTURE_2D, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()),
+                static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+                static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
+        } else {
+            glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
+                                static_cast<GLsizei>(dest_rect.GetWidth()),
+                                static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+                                dest_format.type, nullptr);
+        }
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+        pbo.Release();
    }

-    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-
-    const auto& dest_rect{new_params.GetRect()};
-
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
-    if (dest_format.compressed) {
-        glCompressedTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
-                                  static_cast<GLsizei>(dest_rect.GetWidth()),
-                                  static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
-                                  static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
-    } else {
-        glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
-                            static_cast<GLsizei>(dest_rect.GetWidth()),
-                            static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
-                            dest_format.type, nullptr);
-    }
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-    pbo.Release();
-
    return new_surface;
 }

-Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
-    // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
-    // final output framebuffers are specified by CPU address, but internally our GPU cache uses
-    // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU
-    // address to the one provided. This is obviously not great, and won't work if the
-    // framebuffer overlaps surfaces.
-
-    std::vector<Surface> surfaces;
-    for (const auto& surface : GetCache()) {
-        const auto& params = surface.second->GetSurfaceParams();
-        const VAddr surface_cpu_addr = params.GetCpuAddr();
-        if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
-            ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
-            surfaces.push_back(surface.second);
-        }
-    }
-
-    if (surfaces.empty()) {
-        return {};
-    }
-
-    ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported");
-
-    return surfaces[0];
+Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
+    return TryGet(addr);
 }

 void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -913,7 +892,6 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
    const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
    auto search{surface_reserve.find(surface_reserve_key)};
    if (search != surface_reserve.end()) {
-        Register(search->second);
        return search->second;
    }
    return {};
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -45,42 +45,44 @@ struct SurfaceParams {
        DXN2UNORM = 17,
        DXN2SNORM = 18,
        BC7U = 19,
-        ASTC_2D_4X4 = 20,
-        G8R8U = 21,
-        G8R8S = 22,
-        BGRA8 = 23,
-        RGBA32F = 24,
-        RG32F = 25,
-        R32F = 26,
-        R16F = 27,
-        R16U = 28,
-        R16S = 29,
-        R16UI = 30,
-        R16I = 31,
-        RG16 = 32,
-        RG16F = 33,
-        RG16UI = 34,
-        RG16I = 35,
-        RG16S = 36,
-        RGB32F = 37,
-        SRGBA8 = 38,
-        RG8U = 39,
-        RG8S = 40,
-        RG32UI = 41,
-        R32UI = 42,
+        BC6H_UF16 = 20,
+        BC6H_SF16 = 21,
+        ASTC_2D_4X4 = 22,
+        G8R8U = 23,
+        G8R8S = 24,
+        BGRA8 = 25,
+        RGBA32F = 26,
+        RG32F = 27,
+        R32F = 28,
+        R16F = 29,
+        R16U = 30,
+        R16S = 31,
+        R16UI = 32,
+        R16I = 33,
+        RG16 = 34,
+        RG16F = 35,
+        RG16UI = 36,
+        RG16I = 37,
+        RG16S = 38,
+        RGB32F = 39,
+        SRGBA8 = 40,
+        RG8U = 41,
+        RG8S = 42,
+        RG32UI = 43,
+        R32UI = 44,

        MaxColorFormat,

        // Depth formats
-        Z32F = 43,
-        Z16 = 44,
+        Z32F = 45,
+        Z16 = 46,

        MaxDepthFormat,

        // DepthStencil formats
-        Z24S8 = 45,
-        S8Z24 = 46,
-        Z32FS8 = 47,
+        Z24S8 = 47,
+        S8Z24 = 48,
+        Z32FS8 = 49,

        MaxDepthStencilFormat,

@@ -138,6 +140,8 @@ struct SurfaceParams {
            4, // DXN2UNORM
            4, // DXN2SNORM
            4, // BC7U
+            4, // BC6H_UF16
+            4, // BC6H_SF16
            4, // ASTC_2D_4X4
            1, // G8R8U
            1, // G8R8S
@@ -197,6 +201,8 @@ struct SurfaceParams {
            128, // DXN2UNORM
            128, // DXN2SNORM
            128, // BC7U
+            128, // BC6H_UF16
+            128, // BC6H_SF16
            32,  // ASTC_2D_4X4
            16,  // G8R8U
            16,  // G8R8S
@@ -482,6 +488,10 @@ struct SurfaceParams {
            UNREACHABLE();
        case Tegra::Texture::TextureFormat::BC7U:
            return PixelFormat::BC7U;
+        case Tegra::Texture::TextureFormat::BC6H_UF16:
+            return PixelFormat::BC6H_UF16;
+        case Tegra::Texture::TextureFormat::BC6H_SF16:
+            return PixelFormat::BC6H_SF16;
        case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
            return PixelFormat::ASTC_2D_4X4;
        case Tegra::Texture::TextureFormat::R16_G16:
@@ -628,9 +638,6 @@ struct SurfaceParams {
               GetFormatBpp(pixel_format) / CHAR_BIT;
    }

-    /// Returns the CPU virtual address for this surface
-    VAddr GetCpuAddr() const;
-
    /// Creates SurfaceParams from a texture configuration
    static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);

@@ -643,25 +650,13 @@ struct SurfaceParams {
                                              Tegra::GPUVAddr zeta_address,
                                              Tegra::DepthFormat format);

-    bool operator==(const SurfaceParams& other) const {
-        return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width,
-                        height, unaligned_height, size_in_bytes) ==
-               std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format,
-                        other.component_type, other.type, other.width, other.height,
-                        other.unaligned_height, other.size_in_bytes);
-    }
-
-    bool operator!=(const SurfaceParams& other) const {
-        return !operator==(other);
-    }
-
    /// Checks if surfaces are compatible for caching
    bool IsCompatibleSurface(const SurfaceParams& other) const {
        return std::tie(pixel_format, type, cache_width, cache_height) ==
               std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
    }

-    Tegra::GPUVAddr addr;
+    VAddr addr;
    bool is_tiled;
    u32 block_height;
    PixelFormat pixel_format;
@@ -702,7 +697,7 @@ class CachedSurface final {
 public:
    CachedSurface(const SurfaceParams& params);

-    Tegra::GPUVAddr GetAddr() const {
+    VAddr GetAddr() const {
        return params.addr;
    }

@@ -753,13 +748,16 @@ public:
    /// Flushes the surface to Switch memory
    void FlushSurface(const Surface& surface);

-    /// Tries to find a framebuffer GPU address based on the provided CPU address
-    Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
+    /// Tries to find a framebuffer using on the provided CPU address
+    Surface TryFindFramebufferSurface(VAddr addr) const;

 private:
    void LoadSurface(const Surface& surface);
    Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);

+    /// Gets an uncached surface, creating it if need be
+    Surface GetUncachedSurface(const SurfaceParams& params);
+
    /// Recreates a surface with new parameters
    Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -12,21 +12,17 @@
 namespace OpenGL {

 /// Gets the address for the specified shader stage program
-static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
+static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
-
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+    return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
+                                               shader_config.offset);
 }

 /// Gets the shader program code from memory for the specified address
-static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) {
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-
+static GLShader::ProgramCode GetShaderCode(VAddr addr) {
    GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
-    const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)};
-    Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
-
+    Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
    return program_code;
 }

@@ -55,7 +51,7 @@ static void SetShaderUniformBlockBindings(GLuint shader) {
                                 sizeof(GLShader::MaxwellUniformData));
 }

-CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type)
+CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
    : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {

    GLShader::ProgramResult program_result;
@@ -113,7 +109,7 @@ GLint CachedShader::GetUniformLocation(const std::string& name) {
 }

 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
-    const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
+    const VAddr program_addr{GetShaderAddress(program)};

    // Look up shader in the cache based on address
    Shader shader{TryGet(program_addr)};
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,7 +8,6 @@
 #include <unordered_map>

 #include "common/common_types.h"
-#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -21,16 +20,16 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 class CachedShader final {
 public:
-    CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type);
+    CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);

    /// Gets the address of the shader in guest memory, required for cache management
-    Tegra::GPUVAddr GetAddr() const {
+    VAddr GetAddr() const {
        return addr;
    }

    /// Gets the size of the shader in guest memory, required for cache management
    size_t GetSizeInBytes() const {
-        return sizeof(GLShader::ProgramCode);
+        return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
    }

    /// Gets the shader entries for the shader
@@ -50,7 +49,7 @@ public:
    GLint GetUniformLocation(const std::string& name);

 private:
-    Tegra::GPUVAddr addr;
+    VAddr addr;
    Maxwell::ShaderProgram program_type;
    GLShader::ShaderSetup setup;
    GLShader::ShaderEntries entries;
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -729,8 +729,7 @@ private:
            {PredCondition::LessEqual, "<="},         {PredCondition::GreaterThan, ">"},
            {PredCondition::NotEqual, "!="},          {PredCondition::GreaterEqual, ">="},
            {PredCondition::LessThanWithNan, "<"},    {PredCondition::NotEqualWithNan, "!="},
-            {PredCondition::GreaterThanWithNan, ">"},
-        };
+            {PredCondition::GreaterThanWithNan, ">"}, {PredCondition::GreaterEqualWithNan, ">="}};

        const auto& comparison{PredicateComparisonStrings.find(condition)};
        ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
@@ -739,7 +738,8 @@ private:
        std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
        if (condition == PredCondition::LessThanWithNan ||
            condition == PredCondition::NotEqualWithNan ||
-            condition == PredCondition::GreaterThanWithNan) {
+            condition == PredCondition::GreaterThanWithNan ||
+            condition == PredCondition::GreaterEqualWithNan) {
            predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
        }

@@ -887,6 +887,8 @@ private:
        // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
        // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1

+        ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented");
+
        size_t written_components = 0;
        for (u32 component = 0; component < 4; ++component) {
            if (!instr.texs.IsComponentEnabled(component)) {
@@ -1038,6 +1040,15 @@ private:
            case OpCode::Id::FMUL_R:
            case OpCode::Id::FMUL_IMM: {
                // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+                ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented",
+                           instr.fmul.tab5cb8_2.Value());
+                ASSERT_MSG(instr.fmul.tab5c68_1 == 0, "FMUL tab5cb8_1({}) is not implemented",
+                           instr.fmul.tab5c68_1.Value());
+                ASSERT_MSG(instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented",
+                           instr.fmul.tab5c68_0
+                               .Value()); // SMO typical sends 1 here which seems to be the default
+                ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
+
                op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
                                        instr.alu.saturate_d);
@@ -1363,6 +1374,15 @@ private:
                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
                break;
            }
+            case OpCode::Id::POPC_C:
+            case OpCode::Id::POPC_R:
+            case OpCode::Id::POPC_IMM: {
+                if (instr.popc.invert) {
+                    op_b = "~(" + op_b + ')';
+                }
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1);
+                break;
+            }
            case OpCode::Id::SEL_C:
            case OpCode::Id::SEL_R:
            case OpCode::Id::SEL_IMM: {
@@ -1427,6 +1447,12 @@ private:
            std::string op_b = instr.ffma.negate_b ? "-" : "";
            std::string op_c = instr.ffma.negate_c ? "-" : "";

+            ASSERT_MSG(instr.ffma.cc == 0, "FFMA cc not implemented");
+            ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
+                       instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+            ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
+                       instr.ffma.tab5980_1.Value());
+
            switch (opcode->GetId()) {
            case OpCode::Id::FFMA_CR: {
                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -2100,7 +2126,43 @@ private:
            }
            case OpCode::Id::IPA: {
                const auto& attribute = instr.attribute.fmt28;
-                regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
+                const auto& reg = instr.gpr0;
+                ASSERT_MSG(instr.ipa.sample_mode == Tegra::Shader::IpaSampleMode::Default,
+                           "Unhandled IPA sample mode: {}",
+                           static_cast<u32>(instr.ipa.sample_mode.Value()));
+                ASSERT_MSG(instr.ipa.saturate == 0, "IPA saturate not implemented");
+                switch (instr.ipa.interp_mode) {
+                case Tegra::Shader::IpaInterpMode::Linear:
+                    if (stage == Maxwell3D::Regs::ShaderStage::Fragment &&
+                        attribute.index == Attribute::Index::Position) {
+                        switch (attribute.element) {
+                        case 0:
+                            shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;");
+                            break;
+                        case 1:
+                            shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;");
+                            break;
+                        case 2:
+                            shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;");
+                            break;
+                        case 3:
+                            shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;");
+                            break;
+                        }
+                    } else {
+                        regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+                    }
+                    break;
+                case Tegra::Shader::IpaInterpMode::Perspective:
+                    regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+                    break;
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}",
+                                 static_cast<u32>(instr.ipa.interp_mode.Value()));
+                    UNREACHABLE();
+                    regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
+                }
+
                break;
            }
            case OpCode::Id::SSY: {
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -10,11 +10,14 @@
 #include <glad/glad.h>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "common/telemetry.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/frontend/emu_window.h"
 #include "core/memory.h"
+#include "core/perf_stats.h"
 #include "core/settings.h"
+#include "core/telemetry_session.h"
 #include "core/tracer/recorder.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
@@ -115,7 +118,7 @@ RendererOpenGL::~RendererOpenGL() = default;
 void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
    ScopeAcquireGLContext acquire_context{render_window};

-    Core::System::GetInstance().perf_stats.EndSystemFrame();
+    Core::System::GetInstance().GetPerfStats().EndSystemFrame();

    // Maintain the rasterizer's state as a priority
    OpenGLState prev_state = OpenGLState::GetCurState();
@@ -140,8 +143,8 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&

    render_window.PollEvents();

-    Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
-    Core::System::GetInstance().perf_stats.BeginSystemFrame();
+    Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
+    Core::System::GetInstance().GetPerfStats().BeginSystemFrame();

    // Restore the rasterizer state
    prev_state.Apply();
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -56,6 +56,8 @@ u32 BytesPerPixel(TextureFormat format) {
    case TextureFormat::DXT45:
    case TextureFormat::DXN2:
    case TextureFormat::BC7U:
+    case TextureFormat::BC6H_UF16:
+    case TextureFormat::BC6H_SF16:
        // In this case a 'pixel' actually refers to a 4x4 tile.
        return 16;
    case TextureFormat::R32_G32_B32:
@@ -106,6 +108,8 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
    case TextureFormat::DXN1:
    case TextureFormat::DXN2:
    case TextureFormat::BC7U:
+    case TextureFormat::BC6H_UF16:
+    case TextureFormat::BC6H_SF16:
    case TextureFormat::ASTC_2D_4X4:
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -70,6 +70,9 @@ set(UIS
    main.ui
 )

+file(GLOB COMPAT_LIST
+     ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
+     ${CMAKE_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
 file(GLOB_RECURSE ICONS ${CMAKE_SOURCE_DIR}/dist/icons/*)
 file(GLOB_RECURSE THEMES ${CMAKE_SOURCE_DIR}/dist/qt_themes/*)

@@ -77,6 +80,7 @@ qt5_wrap_ui(UI_HDRS ${UIS})

 target_sources(yuzu
    PRIVATE
+        ${COMPAT_LIST}
        ${ICONS}
        ${THEMES}
        ${UI_HDRS}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Markus Wick	dce624e3f1	core: Use a raw pointer in GetGPUDebugContext. This helper is called very often. The memory ownership shall not be transfered, so just return the raw pointer.	2018-09-04 14:10:05 +02:00
Markus Wick	2081ed7db2	command_processor: Use std::array for bound_engines. subchannel is a 3 bit field. So there must not be more than 8 bound engines. And using a hashmap for up to 8 values is a bit overpowered.	2018-09-04 14:10:05 +02:00
bunnei	1c5636e690	Merge pull request #1231 from lioncash/global service: Migrate global named port map to the KernelCore class	2018-09-03 21:21:12 -04:00
bunnei	2afe8ac4a7	Merge pull request #1229 from lioncash/forward-decl vfs_real: Forward declare IOFile	2018-09-03 21:20:34 -04:00
Mat M	9cfe2414cb	Merge pull request #1233 from lioncash/dynarmic externals: Update dynarmic to 0435ac2	2018-09-03 16:22:13 -04:00
Lioncash	c6fd56b00f	externals: Update dynarmic to 0435ac2	2018-09-03 08:04:24 -04:00
Lioncash	a405373144	vfs_real: Forward declare IOFile Eliminates the need to rebuild some source files if the file_util header ever changes. This also uncovered some indirect inclusions, which have also been fixed.	2018-09-02 12:38:14 -04:00
Lioncash	1242c1ec0a	service: Migrate global named port map to the KernelCore class Now that we have a class representing the kernel in some capacity, we now have a place to put the named port map, so we move it over and get rid of another piece of global state within the core.	2018-09-02 12:35:30 -04:00
bunnei	325f3e0693	Merge pull request #1213 from DarkLordZach/octopath-fs filesystem/maxwell_3d: Various changes to boot Project Octopath Traveller	2018-09-02 10:49:18 -04:00
bunnei	89be49d2f3	Merge pull request #1215 from ogniK5377/texs-nodep-assert Added assert for TEXS nodep	2018-09-02 10:48:27 -04:00
bunnei	2714d9e64c	Merge pull request #1219 from jroweboy/less-artifacts Build - Upload fewer artifacts	2018-09-02 10:48:03 -04:00
bunnei	d2ade27c3f	Merge pull request #1220 from FearlessTobi/extensions-qol yuzu: Display the unsupported GL extensions in the popup	2018-09-02 10:47:25 -04:00
bunnei	177c45e97d	Merge pull request #1214 from ogniK5377/ipa-assert Added better asserts to IPA, Renamed IPA modes to match mesa	2018-09-02 10:44:43 -04:00
bunnei	9c206fe94d	Merge pull request #1216 from ogniK5377/ffma-assert Added FFMA asserts and missing fields	2018-09-02 10:44:13 -04:00
bunnei	1ccc0457d5	Merge pull request #1218 from ogniK5377/fmul-assert Added FMUL asserts	2018-09-02 10:43:48 -04:00
bunnei	7a439630bb	Merge pull request #1228 from lioncash/construct filesystem: Move dir retrieval after path checking in DeleteFile()	2018-09-02 10:43:09 -04:00
Lioncash	fda8f1da20	filesystem: Move dir retrieval after path checking in DeleteFile() We don't need to do the lookup if the path is considered empty currently.	2018-09-02 09:20:17 -04:00
fearlessTobi	0f453488e2	citra_qt: Display the unsupported GL extensions in the popup	2018-09-01 19:01:53 +02:00
James Rowe	a0e1fbfe14	Build - Upload fewer artifacts Appveyor has a limit on artifact retention, and we hit the limit all the time, so just lower the number of build artifacts to just the final zip	2018-09-01 10:42:16 -06:00
David Marcec	60754b4728	Removed saturate assert Unneeded as we already implement it	2018-09-01 19:33:32 +10:00
David Marcec	2edab4e840	Removed saturate assert Saturate already implemented	2018-09-01 19:29:20 +10:00
David Marcec	2bc6abb9a1	Changed tab5980_0 default from 0 -> 1	2018-09-01 19:15:03 +10:00
David Marcec	6f8ed9508d	Added FMUL asserts	2018-09-01 19:05:10 +10:00
David Marcec	b89fc407d7	Added FFMA asserts	2018-09-01 18:45:14 +10:00
David Marcec	948bc87a59	Added assert for TEXS nodep	2018-09-01 17:00:01 +10:00
David Marcec	ad3dca7e62	Added better asserts to IPA, Renamed IPA modes to match mesa IpaMode is changed to IpaInterpMode IpaMode is suppose to be 2 bits not 3 Added IpaSampleMode Added Saturate Renamed modes based on `d27c791891/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp (L2530)`	2018-09-01 16:34:27 +10:00
Zach Hilman	f32e28c7b8	maxwell_3d: Use CoreTiming for query timestamp	2018-08-31 23:25:18 -04:00
Zach Hilman	19d0951ae6	filesystem: Implement OpenReadOnlySaveDataFilesystem	2018-08-31 23:19:49 -04:00
Zach Hilman	7939ea18e8	filesystem: Add OpenFileSystemWithPatch	2018-08-31 23:19:23 -04:00
bunnei	c69dc5acf9	Merge pull request #1196 from FearlessTobi/ccache-consistency .travis: Use Citras ccache for builds instead of yuzus	2018-08-31 21:50:44 -04:00
bunnei	1c05c06e04	Merge pull request #1212 from lioncash/forward-decl core/core: Replace includes with forward declarations where applicable	2018-08-31 21:50:12 -04:00
Lioncash	4a587b81b2	core/core: Replace includes with forward declarations where applicable The follow-up to `e2457418da`, which replaces most of the includes in the core header with forward declarations. This makes it so that if any of the headers the core header was previously including change, then no one will need to rebuild the bulk of the core, due to core.h being quite a prevalent inclusion. This should make turnaround for changes much faster for developers.	2018-08-31 16:30:14 -04:00
fearlessTobi	dc3cc0002c	travis: use Citras ccache	2018-08-31 20:13:26 +02:00
bunnei	42588493d5	Merge pull request #1205 from bunnei/improve-rasterizer-cache-2 Various fixes and improvements to rasterizer cache 2: Electric Boogaloo	2018-08-31 13:24:21 -04:00
bunnei	7f7eb29323	gl_rasterizer_cache: Use accurate framebuffer setting for accurate copies.	2018-08-31 13:07:28 -04:00
bunnei	123c065086	gl_rasterizer_cache: Also use reserve cache for RecreateSurface.	2018-08-31 13:07:28 -04:00
bunnei	9bc71fcc5f	rasterizer_cache: Use boost::interval_map for a more accurate cache.	2018-08-31 13:07:28 -04:00
bunnei	d647d9550c	gl_renderer: Cache textures, framebuffers, and shaders based on CPU address.	2018-08-31 13:07:27 -04:00
bunnei	16d65182f9	gl_rasterizer: Fix issues with the rasterizer cache. - Use a single cached page map. - Fix calculation of ending page.	2018-08-31 13:07:27 -04:00
greggameplayer	06578e89b2	Implement BC6H_UF16 & BC6H_SF16 (#1092 ) * Implement BC6H_UF16 & BC6H_SF16 Require by ARMS * correct coding style * correct coding style part 2	2018-08-31 12:11:19 -04:00
bunnei	f08d24e9c0	Merge pull request #1204 from lioncash/pimpl core: Make the main System class use the PImpl idiom	2018-08-31 11:31:20 -04:00
bunnei	6683bf50b5	Merge pull request #1207 from degasus/hotfix Report correct shader size.	2018-08-31 11:21:15 -04:00
bunnei	e205e74e1f	Merge pull request #1208 from Hexagon12/pred-comp-14 Add predicate comparison 14 (GreaterEqualWithNan)	2018-08-31 11:20:47 -04:00
Lioncash	e2457418da	core: Make the main System class use the PImpl idiom core.h is kind of a massive header in terms what it includes within itself. It includes VFS utilities, kernel headers, file_sys header, ARM-related headers, etc. This means that changing anything in the headers included by core.h essentially requires you to rebuild almost all of core. Instead, we can modify the System class to use the PImpl idiom, which allows us to move all of those headers to the cpp file and forward declare the bulk of the types that would otherwise be included, reducing compile times. This change specifically only performs the PImpl portion.	2018-08-31 07:16:57 -04:00
Markus Wick	5be8b7a362	Report correct shader size. Seems like this was an oversee in regards to `1fd979f50a` It changed GLShader::ProgramCode to a std::vector, so sizeof is wrong.	2018-08-31 09:56:37 +02:00
Hexagon12	d626bc8c62	Added predicate comparison GreaterEqualWithNan	2018-08-31 10:40:18 +03:00
bunnei	26aaa86ece	Merge pull request #1195 from FearlessTobi/port-gamelist-compat yuzu: Show game compatibility in the game list (PR ported from Citra)	2018-08-30 21:34:43 -04:00
Laku	915ab81ec2	gl_shader_decompiler: Implement POPC (#1203 ) * Implement POPC * implement invert	2018-08-30 21:32:58 -04:00
bunnei	d6accf96ff	Merge pull request #1200 from bunnei/improve-ipa gl_shader_decompiler: Improve IPA for Pass mode with Position attribute.	2018-08-30 10:31:26 -04:00
bunnei	5094dfa081	Merge pull request #1198 from lioncash/kernel kernel: Eliminate kernel global state	2018-08-30 10:02:50 -04:00
bunnei	42ef40884f	Merge pull request #1202 from FearlessTobi/port-3825 Port #3825 from Citra: "travis: share environment variables with Docker"	2018-08-30 09:54:32 -04:00
bunnei	6e73039eb5	Merge pull request #1172 from tech4me/impl_iadd3 Shaders: Implemented IADD3	2018-08-30 09:52:27 -04:00
fearlessTobi	78653f7339	Show game compatibility within yuzu	2018-08-29 15:42:53 +02:00
fearlessTobi	02dfbf961e	Remove Citra specific variable	2018-08-29 15:29:37 +02:00
liushuyu	a2c97de929	travis: share env variables with Docker	2018-08-29 15:28:13 +02:00
bunnei	b1ccd88434	gl_shader_decompiler: Improve IPA for Pass mode with Position attribute.	2018-08-29 00:37:29 -04:00
Lioncash	0cbcd6ec9a	kernel: Eliminate kernel global state As means to pave the way for getting rid of global state within core, This eliminates kernel global state by removing all globals. Instead this introduces a KernelCore class which acts as a kernel instance. This instance lives in the System class, which keeps its lifetime contained to the lifetime of the System class. This also forces the kernel types to actually interact with the main kernel instance itself instead of having transient kernel state placed all over several translation units, keeping everything together. It also has a nice consequence of making dependencies much more explicit. This also makes our initialization a tad bit more correct. Previously we were creating a kernel process before the actual kernel was initialized, which doesn't really make much sense. The KernelCore class itself follows the PImpl idiom, which allows keeping all the implementation details sealed away from everything else, which forces the use of the exposed API and allows us to avoid any unnecessary inclusions within the main kernel header.	2018-08-28 22:31:51 -04:00