Corrections, documenting and fixes.

Use u128 on Clock Cycles calculation.
Implement 128 bits Unsigned Integer Multiplication and Division.
2019-02-16 16:52:24 -04:00 · 2019-02-15 22:57:16 -04:00 · 2019-02-15 22:55:31 -04:00 · 2019-02-15 22:55:29 -04:00 · 2019-02-15 19:52:11 -07:00 · 2019-02-15 21:50:25 -05:00
109 changed files with 1481 additions and 825 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,6 @@
 [submodule "discord-rpc"]
    path = externals/discord-rpc
    url = https://github.com/discordapp/discord-rpc.git
+[submodule "Vulkan-Headers"]
+    path = externals/Vulkan-Headers
+    url = https://github.com/KhronosGroup/Vulkan-Headers.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF

 option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)

+option(ENABLE_VULKAN "Enables Vulkan backend" ON)
+
 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

 if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
    return {};
 }

-StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
+StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
+                               u32 num_channels, std::string&& name,
                               Stream::ReleaseCallback&& release_callback) {
    if (!sink) {
        sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
    }

    return std::make_shared<Stream>(
-        sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
+        core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
        sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }

--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
 #include "audio_core/stream.h"
 #include "common/common_types.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace AudioCore {

 /**
@@ -21,8 +25,8 @@ namespace AudioCore {
 class AudioOut {
 public:
    /// Opens a new audio stream
-    StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
-                         Stream::ReleaseCallback&& release_callback);
+    StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
+                         std::string&& name, Stream::ReleaseCallback&& release_callback);

    /// Returns a vector of recently released buffers specified by tag for the specified stream
    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
 #include "audio_core/codec.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"

@@ -71,14 +72,14 @@ private:
    EffectOutStatus out_status{};
    EffectInStatus info{};
 };
-AudioRenderer::AudioRenderer(AudioRendererParameter params,
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                             Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
    : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
      effects(params.effect_count) {

    audio_out = std::make_unique<AudioCore::AudioOut>();
-    stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer",
-                                   [=]() { buffer_event->Signal(); });
+    stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
+                                   "AudioRenderer", [=]() { buffer_event->Signal(); });
    audio_out->StartStream(stream);

    QueueMixedBuffer(0);
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
 #include "common/swap.h"
 #include "core/hle/kernel/object.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Kernel {
 class WritableEvent;
 }
@@ -208,7 +212,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size

 class AudioRenderer {
 public:
-    AudioRenderer(AudioRendererParameter params,
+    AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                  Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
    ~AudioRenderer();

--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const {
    return {};
 }

-Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-               SinkStream& sink_stream, std::string&& name_)
+Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+               ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
    : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
-      sink_stream{sink_stream}, name{std::move(name_)} {
+      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {

-    release_event = CoreTiming::RegisterEvent(
+    release_event = core_timing.RegisterEvent(
        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
 }

@@ -57,7 +57,7 @@ Stream::State Stream::GetState() const {

 s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
-    return CoreTiming::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
+    return Core::Timing::usToCycles((static_cast<u64>(num_samples) * 1000000) / sample_rate);
 }

 static void VolumeAdjustSamples(std::vector<s16>& samples) {
@@ -99,7 +99,7 @@ void Stream::PlayNextBuffer() {

    sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

-    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
+    core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }

 void Stream::ReleaseActiveBuffer() {
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -13,9 +13,10 @@
 #include "audio_core/buffer.h"
 #include "common/common_types.h"

-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace AudioCore {

@@ -42,8 +43,8 @@ public:
    /// Callback function type, used to change guest state on a buffer being released
    using ReleaseCallback = std::function<void()>;

-    Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-           SinkStream& sink_stream, std::string&& name_);
+    Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+           ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);

    /// Plays the audio stream
    void Play();
@@ -91,16 +92,17 @@ private:
    /// Gets the number of core cycles when the specified buffer will be released
    s64 GetBufferReleaseCycles(const Buffer& buffer) const;

-    u32 sample_rate;                        ///< Sample rate of the stream
-    Format format;                          ///< Format of the stream
-    ReleaseCallback release_callback;       ///< Buffer release callback for the stream
-    State state{State::Stopped};            ///< Playback state of the stream
-    CoreTiming::EventType* release_event{}; ///< Core timing release event for the stream
-    BufferPtr active_buffer;                ///< Actively playing buffer in the stream
-    std::queue<BufferPtr> queued_buffers;   ///< Buffers queued to be played in the stream
-    std::queue<BufferPtr> released_buffers; ///< Buffers recently released from the stream
-    SinkStream& sink_stream;                ///< Output sink for the stream
-    std::string name;                       ///< Name of the stream, must be unique
+    u32 sample_rate;                          ///< Sample rate of the stream
+    Format format;                            ///< Format of the stream
+    ReleaseCallback release_callback;         ///< Buffer release callback for the stream
+    State state{State::Stopped};              ///< Playback state of the stream
+    Core::Timing::EventType* release_event{}; ///< Core timing release event for the stream
+    BufferPtr active_buffer;                  ///< Actively playing buffer in the stream
+    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
+    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
+    SinkStream& sink_stream;                  ///< Output sink for the stream
+    Core::Timing::CoreTiming& core_timing;    ///< Core timing instance.
+    std::string name;                         ///< Name of the stream, must be unique
 };

 using StreamPtr = std::shared_ptr<Stream>;
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -113,6 +113,8 @@ add_library(common STATIC
    threadsafe_queue.h
    timer.cpp
    timer.h
+    uint128.cpp
+    uint128.h
    vector_math.h
    web_result.h
 )
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -232,6 +232,7 @@ void DebuggerBackend::Write(const Entry& entry) {
    CLS(Render)                                                                                    \
    SUB(Render, Software)                                                                          \
    SUB(Render, OpenGL)                                                                            \
+    SUB(Render, Vulkan)                                                                            \
    CLS(Audio)                                                                                     \
    SUB(Audio, DSP)                                                                                \
    SUB(Audio, Sink)                                                                               \
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
    Render,            ///< Emulator video output and hardware acceleration
    Render_Software,   ///< Software renderer backend
    Render_OpenGL,     ///< OpenGL backend
+    Render_Vulkan,     ///< Vulkan backend
    Audio,             ///< Audio emulation
    Audio_DSP,         ///< The HLE implementation of the DSP
    Audio_Sink,        ///< Emulator audio output backend
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,16 @@
 // a simple lockless thread-safe,
 // single reader, single writer queue

-#include <algorithm>
 #include <atomic>
 #include <cstddef>
 #include <mutex>
-#include "common/common_types.h"
+#include <utility>

 namespace Common {
-template <typename T, bool NeedSize = true>
+template <typename T>
 class SPSCQueue {
 public:
-    SPSCQueue() : size(0) {
+    SPSCQueue() {
        write_ptr = read_ptr = new ElementPtr();
    }
    ~SPSCQueue() {
@@ -25,13 +24,12 @@ public:
        delete read_ptr;
    }

-    u32 Size() const {
-        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
+    std::size_t Size() const {
        return size.load();
    }

    bool Empty() const {
-        return !read_ptr->next.load();
+        return Size() == 0;
    }

    T& Front() const {
@@ -47,13 +45,13 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        if (NeedSize)
-            size++;
+
+        ++size;
    }

    void Pop() {
-        if (NeedSize)
-            size--;
+        --size;
+
        ElementPtr* tmpptr = read_ptr;
        // advance the read pointer
        read_ptr = tmpptr->next.load();
@@ -66,8 +64,7 @@ public:
        if (Empty())
            return false;

-        if (NeedSize)
-            size--;
+        --size;

        ElementPtr* tmpptr = read_ptr;
        read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -89,7 +86,7 @@ private:
    // and a pointer to the next ElementPtr
    class ElementPtr {
    public:
-        ElementPtr() : next(nullptr) {}
+        ElementPtr() {}
        ~ElementPtr() {
            ElementPtr* next_ptr = next.load();

@@ -98,21 +95,21 @@ private:
        }

        T current;
-        std::atomic<ElementPtr*> next;
+        std::atomic<ElementPtr*> next{nullptr};
    };

    ElementPtr* write_ptr;
    ElementPtr* read_ptr;
-    std::atomic<u32> size;
+    std::atomic_size_t size{0};
 };

 // a simple thread-safe,
 // single reader, multiple writer queue

-template <typename T, bool NeedSize = true>
+template <typename T>
 class MPSCQueue {
 public:
-    u32 Size() const {
+    std::size_t Size() const {
        return spsc_queue.Size();
    }

@@ -144,7 +141,7 @@ public:
    }

 private:
-    SPSCQueue<T, NeedSize> spsc_queue;
+    SPSCQueue<T> spsc_queue;
    std::mutex write_lock;
 };
 } // namespace Common
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include <cstring>
+#include "common/uint128.h"
+
+namespace Common {
+
+u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
+
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    const u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
+    return {accum, remainder};
+}
+
+} // namespace Common
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
+
+#include <utility>
+#include "common/common_types.h"
+
+namespace Common {
+
+// This function multiplies 2 u64 values and produces a u128 value;
+u128 Multiply64Into128(u64 a, u64 b);
+
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+
+} // namespace Common
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -112,14 +113,14 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        CoreTiming::AddTicks(amortized_ticks);
+        parent.core_timing.AddTicks(amortized_ticks);
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(CoreTiming::GetDowncount(), 0);
+        return std::max(parent.core_timing.GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return CoreTiming::GetTicks();
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
    }

    ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
    config.tpidr_el0 = &cb->tpidr_el0;
    config.dczid_el0 = 4;
    config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;

    // Unpredictable instructions
    config.define_unpredictable_behaviour = true;
@@ -172,8 +173,10 @@ void ARM_Dynarmic::Step() {
    cb->InterpreterFallback(jit->GetPC(), 1);
 }

-ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
+ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                           std::size_t core_index)
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
+      core_index{core_index}, core_timing{core_timing},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
    ThreadContext ctx{};
    inner_unicorn.SaveContext(ctx);
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -16,6 +16,10 @@ namespace Memory {
 struct PageTable;
 }

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Dynarmic_Callbacks;
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;

 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                 std::size_t core_index);
    ~ARM_Dynarmic();

    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -62,6 +67,7 @@ private:
    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
+    Timing::CoreTiming& core_timing;
    DynarmicExclusiveMonitor& exclusive_monitor;

    Memory::PageTable* current_page_table = nullptr;
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return {};
 }

-ARM_Unicorn::ARM_Unicorn() {
+ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+        ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
    }
 }

@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    CoreTiming::AddTicks(num_instructions);
+    core_timing.AddTicks(num_instructions);
    if (GDBStub::IsServerEnabled()) {
        if (last_bkpt_hit) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,12 +9,17 @@
 #include "core/arm/arm_interface.h"
 #include "core/gdbstub/gdbstub.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Unicorn final : public ARM_Interface {
 public:
-    ARM_Unicorn();
+    explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
    ~ARM_Unicorn();
+
    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                          Kernel::VMAPermission perms) override;
    void UnmapMemory(VAddr address, std::size_t size) override;
@@ -43,6 +48,7 @@ public:

 private:
    uc_engine* uc{};
+    Timing::CoreTiming& core_timing;
    GDBStub::BreakpointAddress last_bkpt{};
    bool last_bkpt_hit;
 };
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -94,8 +94,8 @@ struct System::Impl {
    ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
        LOG_DEBUG(HW_Memory, "initialized OK");

-        CoreTiming::Init();
-        kernel.Initialize();
+        core_timing.Initialize();
+        kernel.Initialize(core_timing);

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
            std::chrono::system_clock::now().time_since_epoch());
@@ -120,7 +120,7 @@ struct System::Impl {
        telemetry_session = std::make_unique<Core::TelemetrySession>();
        service_manager = std::make_shared<Service::SM::ServiceManager>();

-        Service::Init(service_manager, *virtual_filesystem);
+        Service::Init(service_manager, system, *virtual_filesystem);
        GDBStub::Init();

        renderer = VideoCore::CreateRenderer(emu_window, system);
@@ -205,7 +205,7 @@ struct System::Impl {

        // Shutdown kernel and core timing
        kernel.Shutdown();
-        CoreTiming::Shutdown();
+        core_timing.Shutdown();

        // Close app loader
        app_loader.reset();
@@ -232,9 +232,10 @@ struct System::Impl {
    }

    PerfStatsResults GetAndResetPerfStats() {
-        return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
+        return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
    }

+    Timing::CoreTiming core_timing;
    Kernel::KernelCore kernel;
    /// RealVfsFilesystem instance
    FileSys::VirtualFilesystem virtual_filesystem;
@@ -396,6 +397,14 @@ const Kernel::KernelCore& System::Kernel() const {
    return impl->kernel;
 }

+Timing::CoreTiming& System::CoreTiming() {
+    return impl->core_timing;
+}
+
+const Timing::CoreTiming& System::CoreTiming() const {
+    return impl->core_timing;
+}
+
 Core::PerfStats& System::GetPerfStats() {
    return impl->perf_stats;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -47,6 +47,10 @@ namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -205,6 +209,12 @@ public:
    /// Provides a constant pointer to the current process.
    const Kernel::Process* CurrentProcess() const;

+    /// Provides a reference to the core timing instance.
+    Timing::CoreTiming& CoreTiming();
+
+    /// Provides a constant reference to the core timing instance.
+    const Timing::CoreTiming& CoreTiming() const;
+
    /// Provides a reference to the kernel instance.
    Kernel::KernelCore& Kernel();

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -49,17 +49,18 @@ bool CpuBarrier::Rendezvous() {
    return false;
 }

-Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_index{core_index} {
+Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+         CpuBarrier& cpu_barrier, std::size_t core_index)
+    : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
 #else
        arm_interface = std::make_unique<ARM_Unicorn>();
        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
    } else {
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
    }

    scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
@@ -93,14 +94,14 @@ void Cpu::RunLoop(bool tight_loop) {

        if (IsMainCore()) {
            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
-            CoreTiming::Idle();
-            CoreTiming::Advance();
+            core_timing.Idle();
+            core_timing.Advance();
        }

        PrepareReschedule();
    } else {
        if (IsMainCore()) {
-            CoreTiming::Advance();
+            core_timing.Advance();
        }

        if (tight_loop) {
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
 class Scheduler;
 }

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -41,7 +45,8 @@ private:

 class Cpu {
 public:
-    Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index);
+    Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+        CpuBarrier& cpu_barrier, std::size_t core_index);
    ~Cpu();

    void RunLoop(bool tight_loop = true);
@@ -82,6 +87,7 @@ private:
    std::unique_ptr<ARM_Interface> arm_interface;
    CpuBarrier& cpu_barrier;
    std::unique_ptr<Kernel::Scheduler> scheduler;
+    Timing::CoreTiming& core_timing;

    std::atomic<bool> reschedule_pending = false;
    std::size_t core_index;
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,71 +8,60 @@
 #include <mutex>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <vector>
+
 #include "common/assert.h"
 #include "common/thread.h"
-#include "common/threadsafe_queue.h"
 #include "core/core_timing_util.h"

-namespace CoreTiming {
+namespace Core::Timing {

-static s64 global_timer;
-static int slice_length;
-static int downcount;
+constexpr int MAX_SLICE_LENGTH = 20000;

-struct EventType {
-    TimedCallback callback;
-    const std::string* name;
-};
-
-struct Event {
+struct CoreTiming::Event {
    s64 time;
    u64 fifo_order;
    u64 userdata;
    const EventType* type;
+
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
+
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
 };

-// Sort by time, unless the times are the same, in which case sort by the order added to the queue
-static bool operator>(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
+
+void CoreTiming::Initialize() {
+    downcount = MAX_SLICE_LENGTH;
+    slice_length = MAX_SLICE_LENGTH;
+    global_timer = 0;
+    idled_cycles = 0;
+
+    // The time between CoreTiming being initialized and the first call to Advance() is considered
+    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
+    // executing the first cycle of each slice to prepare the slice length and downcount for
+    // that slice.
+    is_global_timer_sane = true;
+
+    event_fifo_id = 0;
+
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
 }

-static bool operator<(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+void CoreTiming::Shutdown() {
+    MoveEvents();
+    ClearPendingEvents();
+    UnregisterAllEvents();
 }

-// unordered_map stores each element separately as a linked list node so pointers to elements
-// remain stable regardless of rehashes/resizing.
-static std::unordered_map<std::string, EventType> event_types;
-
-// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
-// We don't use std::priority_queue because we need to be able to serialize, unserialize and
-// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
-// by the standard adaptor class.
-static std::vector<Event> event_queue;
-static u64 event_fifo_id;
-// the queue for storing the events from other threads threadsafe until they will be added
-// to the event_queue by the emu thread
-static Common::MPSCQueue<Event, false> ts_queue;
-
-// the queue for unscheduling the events from other threads threadsafe
-static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
-
-constexpr int MAX_SLICE_LENGTH = 20000;
-
-static s64 idled_cycles;
-
-// Are we in a function that has been called from Advance()
-// If events are sheduled from a function that gets called from Advance(),
-// don't change slice_length and downcount.
-static bool is_global_timer_sane;
-
-static EventType* ev_lost = nullptr;
-
-static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
-
-EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
+EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
    // check for existing type with same name.
    // we want event type names to remain unique so that we can use them for serialization.
    ASSERT_MSG(event_types.find(name) == event_types.end(),
@@ -86,71 +75,31 @@ EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
    return event_type;
 }

-void UnregisterAllEvents() {
+void CoreTiming::UnregisterAllEvents() {
    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
    event_types.clear();
 }

-void Init() {
-    downcount = MAX_SLICE_LENGTH;
-    slice_length = MAX_SLICE_LENGTH;
-    global_timer = 0;
-    idled_cycles = 0;
-
-    // The time between CoreTiming being intialized and the first call to Advance() is considered
-    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
-    // executing the first cycle of each slice to prepare the slice length and downcount for
-    // that slice.
-    is_global_timer_sane = true;
-
-    event_fifo_id = 0;
-    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
-}
-
-void Shutdown() {
-    MoveEvents();
-    ClearPendingEvents();
-    UnregisterAllEvents();
-}
-
-// This should only be called from the CPU thread. If you are calling
-// it from any other thread, you are doing something evil
-u64 GetTicks() {
-    u64 ticks = static_cast<u64>(global_timer);
-    if (!is_global_timer_sane) {
-        ticks += slice_length - downcount;
-    }
-    return ticks;
-}
-
-void AddTicks(u64 ticks) {
-    downcount -= static_cast<int>(ticks);
-}
-
-u64 GetIdleTicks() {
-    return static_cast<u64>(idled_cycles);
-}
-
-void ClearPendingEvents() {
-    event_queue.clear();
-}
-
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
    ASSERT(event_type != nullptr);
-    s64 timeout = GetTicks() + cycles_into_future;
+    const s64 timeout = GetTicks() + cycles_into_future;
+
    // If this event needs to be scheduled before the next advance(), force one early
-    if (!is_global_timer_sane)
+    if (!is_global_timer_sane) {
        ForceExceptionCheck(cycles_into_future);
+    }
+
    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
 }

-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                         u64 userdata) {
    ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
 }

-void UnscheduleEvent(const EventType* event_type, u64 userdata) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
        return e.type == event_type && e.userdata == userdata;
    });

@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
    }
 }

-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
+void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
    unschedule_queue.Push(std::make_pair(event_type, userdata));
 }

-void RemoveEvent(const EventType* event_type) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
-                              [&](const Event& e) { return e.type == event_type; });
+u64 CoreTiming::GetTicks() const {
+    u64 ticks = static_cast<u64>(global_timer);
+    if (!is_global_timer_sane) {
+        ticks += slice_length - downcount;
+    }
+    return ticks;
+}
+
+u64 CoreTiming::GetIdleTicks() const {
+    return static_cast<u64>(idled_cycles);
+}
+
+void CoreTiming::AddTicks(u64 ticks) {
+    downcount -= static_cast<int>(ticks);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const EventType* event_type) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
+                                    [&](const Event& e) { return e.type == event_type; });

    // Removing random items breaks the invariant so we have to re-establish it.
    if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
    }
 }

-void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
+void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
    MoveEvents();
    RemoveEvent(event_type);
 }

-void ForceExceptionCheck(s64 cycles) {
+void CoreTiming::ForceExceptionCheck(s64 cycles) {
    cycles = std::max<s64>(0, cycles);
-    if (downcount > cycles) {
-        // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
-        // here. Account for cycles already executed by adjusting the g.slice_length
-        slice_length -= downcount - static_cast<int>(cycles);
-        downcount = static_cast<int>(cycles);
+    if (downcount <= cycles) {
+        return;
    }
+
+    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
+    // here. Account for cycles already executed by adjusting the g.slice_length
+    slice_length -= downcount - static_cast<int>(cycles);
+    downcount = static_cast<int>(cycles);
 }

-void MoveEvents() {
+void CoreTiming::MoveEvents() {
    for (Event ev; ts_queue.Pop(ev);) {
        ev.fifo_order = event_fifo_id++;
        event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
    }
 }

-void Advance() {
+void CoreTiming::Advance() {
    MoveEvents();
    for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
        UnscheduleEvent(ev.first, ev.second);
    }

-    int cycles_executed = slice_length - downcount;
+    const int cycles_executed = slice_length - downcount;
    global_timer += cycles_executed;
    slice_length = MAX_SLICE_LENGTH;

@@ -229,17 +200,17 @@ void Advance() {
    downcount = slice_length;
 }

-void Idle() {
+void CoreTiming::Idle() {
    idled_cycles += downcount;
    downcount = 0;
 }

-std::chrono::microseconds GetGlobalTimeUs() {
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
 }

-int GetDowncount() {
+int CoreTiming::GetDowncount() const {
    return downcount;
 }

-} // namespace CoreTiming
+} // namespace Core::Timing
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@

 #pragma once

+#include <chrono>
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::Timing {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string* name;
+};
+
 /**
 * This is a system to schedule events into the emulated machine's future. Time is measured
 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
 * inside callback:
 *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
 */
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();

-#include <chrono>
-#include <functional>
-#include <string>
-#include "common/common_types.h"
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;

-namespace CoreTiming {
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;

-struct EventType;
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();

-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+    /// Tears down all timing related functionality.
+    void Shutdown();

-/**
- * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
- * required to end slice -1 and start slice 0 before the first cycle of code is executed.
- */
-void Init();
-void Shutdown();
+    /// Registers a core timing event with the given name and callback.
+    ///
+    /// @param name     The name of the core timing event to register.
+    /// @param callback The callback to execute for the event.
+    ///
+    /// @returns An EventType instance representing the registered event.
+    ///
+    /// @pre The name of the event being registered must be unique among all
+    ///      registered events.
+    ///
+    EventType* RegisterEvent(const std::string& name, TimedCallback callback);

-/**
- * This should only be called from the emu thread, if you are calling it any other thread, you are
- * doing something evil
- */
-u64 GetTicks();
-u64 GetIdleTicks();
-void AddTicks(u64 ticks);
+    /// Unregisters all registered events thus far.
+    void UnregisterAllEvents();

-/**
- * Returns the event_type identifier. if name is not unique, it will assert.
- */
-EventType* RegisterEvent(const std::string& name, TimedCallback callback);
-void UnregisterAllEvents();
+    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
+    /// event is scheduled earlier than the current values.
+    ///
+    /// Scheduling from a callback will not update the downcount until the Advance() completes.
+    void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);

-/**
- * After the first Advance, the slice lengths and the downcount will be reduced whenever an event
- * is scheduled earlier than the current values.
- * Scheduling from a callback will not update the downcount until the Advance() completes.
- */
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
+    /// This is to be called when outside of hle threads, such as the graphics thread, wants to
+    /// schedule things to be executed on the main thread.
+    ///
+    /// @note This doesn't change slice_length and thus events scheduled by this might be
+    /// called with a delay of up to MAX_SLICE_LENGTH
+    void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                 u64 userdata = 0);

-/**
- * This is to be called when outside of hle threads, such as the graphics thread, wants to
- * schedule things to be executed on the main thread.
- * Not that this doesn't change slice_length and thus events scheduled by this might be called
- * with a delay of up to MAX_SLICE_LENGTH
- */
-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
+    void UnscheduleEvent(const EventType* event_type, u64 userdata);
+    void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);

-void UnscheduleEvent(const EventType* event_type, u64 userdata);
-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const EventType* event_type);
+    void RemoveNormalAndThreadsafeEvent(const EventType* event_type);

-/// We only permit one event of each type in the queue at a time.
-void RemoveEvent(const EventType* event_type);
-void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
+    void ForceExceptionCheck(s64 cycles);

-/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
- * the previous timing slice and begins the next one, you must Advance from the previous
- * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
- * Advance() is required to initialize the slice length before the first cycle of emulated
- * instructions is executed.
- */
-void Advance();
-void MoveEvents();
+    /// This should only be called from the emu thread, if you are calling it any other thread,
+    /// you are doing something evil
+    u64 GetTicks() const;

-/// Pretend that the main CPU has executed enough cycles to reach the next event.
-void Idle();
+    u64 GetIdleTicks() const;

-/// Clear all pending events. This should ONLY be done on exit.
-void ClearPendingEvents();
+    void AddTicks(u64 ticks);

-void ForceExceptionCheck(s64 cycles);
+    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
+    /// the previous timing slice and begins the next one, you must Advance from the previous
+    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
+    /// Advance() is required to initialize the slice length before the first cycle of emulated
+    /// instructions is executed.
+    void Advance();

-std::chrono::microseconds GetGlobalTimeUs();
+    /// Pretend that the main CPU has executed enough cycles to reach the next event.
+    void Idle();

-int GetDowncount();
+    std::chrono::microseconds GetGlobalTimeUs() const;

-} // namespace CoreTiming
+    int GetDowncount() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+    void MoveEvents();
+
+    s64 global_timer = 0;
+    s64 idled_cycles = 0;
+    int slice_length = 0;
+    int downcount = 0;
+
+    // Are we in a function that has been called from Advance()
+    // If events are scheduled from a function that gets called from Advance(),
+    // don't change slice_length and downcount.
+    bool is_global_timer_sane = false;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    // Stores each element separately as a linked list node so pointers to elements
+    // remain stable regardless of rehashes/resizing.
+    std::unordered_map<std::string, EventType> event_types;
+
+    // The queue for storing the events from other threads threadsafe until they will be added
+    // to the event_queue by the emu thread
+    Common::MPSCQueue<Event> ts_queue;
+
+    // The queue for unscheduling the events from other threads threadsafe
+    Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
+
+    EventType* ev_lost = nullptr;
+};
+
+} // namespace Core::Timing
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,8 +7,9 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"

-namespace CoreTiming {
+namespace Core::Timing {

 constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / BASE_CLOCK_RATE;

@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
    return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }

-} // namespace CoreTiming
+u64 CpuCyclesToClockCycles(u64 ticks) {
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
+}
+
+} // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -6,11 +6,12 @@

 #include "common/common_types.h"

-namespace CoreTiming {
+namespace Core::Timing {

 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.

 inline s64 msToCycles(int ms) {
    // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
    return cycles * 1000 / BASE_CLOCK_RATE;
 }

-} // namespace CoreTiming
+u64 CpuCyclesToClockCycles(u64 ticks);
+
+} // namespace Core::Timing
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,8 @@ void CpuCoreManager::Initialize(System& system) {
    exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());

    for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
+        cores[index] =
+            std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
    }

    // Create threads for CPU cores 1-3, and build thread_to_cpu map
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -86,11 +86,11 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 }

 struct KernelCore::Impl {
-    void Initialize(KernelCore& kernel) {
+    void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
        Shutdown();

        InitializeSystemResourceLimit(kernel);
-        InitializeThreads();
+        InitializeThreads(core_timing);
    }

    void Shutdown() {
@@ -122,9 +122,9 @@ struct KernelCore::Impl {
        ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
    }

-    void InitializeThreads() {
+    void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
        thread_wakeup_event_type =
-            CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    std::atomic<u32> next_object_id{0};
@@ -137,7 +137,7 @@ struct KernelCore::Impl {

    SharedPtr<ResourceLimit> system_resource_limit;

-    CoreTiming::EventType* thread_wakeup_event_type = nullptr;
+    Core::Timing::EventType* thread_wakeup_event_type = nullptr;
    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
    // allowing us to simply use a pool index or similar.
    Kernel::HandleTable thread_wakeup_callback_handle_table;
@@ -152,8 +152,8 @@ KernelCore::~KernelCore() {
    Shutdown();
 }

-void KernelCore::Initialize() {
-    impl->Initialize(*this);
+void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
+    impl->Initialize(*this, core_timing);
 }

 void KernelCore::Shutdown() {
@@ -213,7 +213,7 @@ u64 KernelCore::CreateNewProcessID() {
    return impl->next_process_id++;
 }

-CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
+Core::Timing::EventType* KernelCore::ThreadWakeupCallbackEventType() const {
    return impl->thread_wakeup_event_type;
 }

--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,9 +11,10 @@
 template <typename T>
 class ResultVal;

-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace Kernel {

@@ -39,7 +40,11 @@ public:
    KernelCore& operator=(KernelCore&&) = delete;

    /// Resets the kernel to a clean slate for use.
-    void Initialize();
+    ///
+    /// @param core_timing CoreTiming instance used to create any necessary
+    ///                    kernel-specific callback events.
+    ///
+    void Initialize(Core::Timing::CoreTiming& core_timing);

    /// Clears all resources in use by the kernel instance.
    void Shutdown();
@@ -89,7 +94,7 @@ private:
    u64 CreateNewThreadID();

    /// Retrieves the event type used for thread wakeup callbacks.
-    CoreTiming::EventType* ThreadWakeupCallbackEventType() const;
+    Core::Timing::EventType* ThreadWakeupCallbackEventType() const;

    /// Provides a reference to the thread wakeup callback handle table.
    Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {

 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
+    const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;

    if (thread != nullptr) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -918,6 +918,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        }

        const auto& system = Core::System::GetInstance();
+        const auto& core_timing = system.CoreTiming();
        const auto& scheduler = system.CurrentScheduler();
        const auto* const current_thread = scheduler.GetCurrentThread();
        const bool same_thread = current_thread == thread;
@@ -927,9 +928,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();

-            out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
+            out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
        }

        *result = out_ticks;
@@ -1546,10 +1547,11 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
 static u64 GetSystemTick() {
    LOG_TRACE(Kernel_SVC, "called");

-    const u64 result{CoreTiming::GetTicks()};
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+    const u64 result{core_timing.GetTicks()};

    // Advance time to defeat dumb games that busy-wait for the frame to end.
-    CoreTiming::AddTicks(400);
+    core_timing.AddTicks(400);

    return result;
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -43,7 +43,8 @@ Thread::~Thread() = default;

 void Thread::Stop() {
    // Cancel any outstanding wakeup events for this thread
-    CoreTiming::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                             callback_handle);
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;

@@ -85,12 +86,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {

    // This function might be called from any thread so we have to be cautious and use the
    // thread-safe version of ScheduleEvent.
-    CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds),
-                                        kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
+        Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
+        callback_handle);
 }

 void Thread::CancelWakeupTimer() {
-    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
+        kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -189,6 +192,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
        return ResultCode(-1);
    }

+    auto& system = Core::System::GetInstance();
    SharedPtr<Thread> thread(new Thread(kernel));

    thread->thread_id = kernel.CreateNewThreadID();
@@ -197,7 +201,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->stack_top = stack_top;
    thread->tpidr_el0 = 0;
    thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = CoreTiming::GetTicks();
+    thread->last_running_ticks = system.CoreTiming().GetTicks();
    thread->processor_id = processor_id;
    thread->ideal_core = processor_id;
    thread->affinity_mask = 1ULL << processor_id;
@@ -208,7 +212,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->name = std::move(name);
    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = &owner_process;
-    thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id);
+    thread->scheduler = &system.Scheduler(processor_id);
    thread->scheduler->AddThread(thread, priority);
    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);

@@ -257,7 +261,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
    }

    if (status == ThreadStatus::Running) {
-        last_running_ticks = CoreTiming::GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    }

    status = new_status;
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -68,12 +68,12 @@ public:
        RegisterHandlers(functions);

        // This is the event handle used to check if the audio buffer was released
-        auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioOutBufferReleased");
+        auto& system = Core::System::GetInstance();
+        buffer_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");

-        stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
-                                       std::move(unique_name),
+        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
+                                       audio_params.channel_count, std::move(unique_name),
                                       [=]() { buffer_event.writable->Signal(); });
    }

--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -42,10 +42,11 @@ public:
        // clang-format on
        RegisterHandlers(functions);

-        auto& kernel = Core::System::GetInstance().Kernel();
-        system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable);
+        auto& system = Core::System::GetInstance();
+        system_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
+                                                              system_event.writable);
    }

 private:
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
 #include "common/common_types.h"
 #include "common/swap.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Service::HID {
 class ControllerBase {
 public:
@@ -20,7 +24,8 @@ public:
    virtual void OnRelease() = 0;

    // When the controller is requesting an update for the shared memory
-    virtual void OnUpdate(u8* data, std::size_t size) = 0;
+    virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                          std::size_t size) = 0;

    // Called when input devices should be loaded
    virtual void OnLoadInputDevices() = 0;
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}

 void Controller_DebugPad::OnRelease() {}

-void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}

 void Controller_Gesture::OnRelease() {}

-void Controller_Gesture::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}

 void Controller_Keyboard::OnRelease() {}

-void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
 void Controller_Mouse::OnInit() {}
 void Controller_Mouse::OnRelease() {}

-void Controller_Mouse::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
    rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
 }

-void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
+void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t data_len) {
    if (!IsControllerActivated())
        return;
    for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
            const auto& last_entry =
                main_controller->npad[main_controller->common.last_entry_index];

-            main_controller->common.timestamp = CoreTiming::GetTicks();
+            main_controller->common.timestamp = core_timing.GetTicks();
            main_controller->common.last_entry_index =
                (main_controller->common.last_entry_index + 1) % 17;

--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}

 void Controller_Stubbed::OnRelease() {}

-void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) {
+void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
    if (!smart_update) {
        return;
    }

    CommonHeader header{};
-    header.timestamp = CoreTiming::GetTicks();
+    header.timestamp = core_timing.GetTicks();
    header.total_entry_count = 17;
    header.entry_count = 0;
    header.last_entry_index = 0;
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}

 void Controller_Touchscreen::OnRelease() {}

-void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = CoreTiming::GetTicks();
+void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                      std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
        touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
        touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
        touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = CoreTiming::GetTicks();
+        const u64 tick = core_timing.GetTicks();
        touch_entry.delta_time = tick - last_touch;
        last_touch = tick;
        touch_entry.finger = Settings::values.touchscreen.finger;
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}

 void Controller_XPad::OnRelease() {}

-void Controller_XPad::OnUpdate(u8* data, std::size_t size) {
+void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t size) {
    for (auto& xpad_entry : shared_memory.shared_memory_entries) {
-        xpad_entry.header.timestamp = CoreTiming::GetTicks();
+        xpad_entry.header.timestamp = core_timing.GetTicks();
        xpad_entry.header.total_entry_count = 17;

        if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -36,9 +36,9 @@ namespace Service::HID {

 // Updating period for each HID device.
 // TODO(ogniK): Find actual polling rate of hid
-constexpr u64 pad_update_ticks = CoreTiming::BASE_CLOCK_RATE / 66;
-constexpr u64 accelerometer_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
-constexpr u64 gyroscope_update_ticks = CoreTiming::BASE_CLOCK_RATE / 100;
+constexpr u64 pad_update_ticks = Core::Timing::BASE_CLOCK_RATE / 66;
+constexpr u64 accelerometer_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
+constexpr u64 gyroscope_update_ticks = Core::Timing::BASE_CLOCK_RATE / 100;
 constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;

 IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
@@ -73,14 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
    GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);

    // Register update callbacks
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
    pad_update_event =
-        CoreTiming::RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
            UpdateControllers(userdata, cycles_late);
        });

    // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)

-    CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);

    ReloadInputDevices();
 }
@@ -94,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
 }

 IAppletResource ::~IAppletResource() {
-    CoreTiming::UnscheduleEvent(pad_update_event, 0);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
 }

 void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -106,15 +107,17 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
 }

 void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+
    const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
    for (const auto& controller : controllers) {
        if (should_reload) {
            controller->OnLoadInputDevices();
        }
-        controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
+        controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
    }

-    CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
 }

 class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -7,7 +7,7 @@
 #include "controllers/controller_base.h"
 #include "core/hle/service/service.h"

-namespace CoreTiming {
+namespace Core::Timing {
 struct EventType;
 }

@@ -66,7 +66,7 @@ private:

    Kernel::SharedPtr<Kernel::SharedMemory> shared_mem;

-    CoreTiming::EventType* pad_update_event;
+    Core::Timing::EventType* pad_update_event;

    std::array<std::unique_ptr<ControllerBase>, static_cast<size_t>(HidController::MaxControllers)>
        controllers{};
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 5};
    rb.Push(RESULT_SUCCESS);
-    rb.PushRaw<u64>(CoreTiming::GetTicks());
+    rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
    rb.PushRaw<u32>(0);
 }

--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -25,9 +25,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
                        const MathUtil::Rectangle<int>& crop_rect) {
    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
-    LOG_WARNING(Service,
-                "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
-                addr, offset, width, height, stride, format);
+    LOG_TRACE(Service,
+              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
+              addr, offset, width, height, stride, format);

    using PixelFormat = Tegra::FramebufferConfig::PixelFormat;
    const Tegra::FramebufferConfig framebuffer{
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o

    IoctlGetGpuTime params{};
    std::memcpy(&params, input.data(), input.size());
-    params.gpu_time = CoreTiming::cyclesToNs(CoreTiming::GetTicks());
+    params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
    std::memcpy(output.data(), &params, output.size());
    return 0;
 }
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -13,10 +13,6 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/writable_event.h"

-namespace CoreTiming {
-struct EventType;
-}
-
 namespace Service::NVFlinger {

 struct IGBPBuffer {
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -25,21 +25,21 @@
 namespace Service::NVFlinger {

 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
-constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
+constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

-NVFlinger::NVFlinger() {
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    // Schedule the screen composition events
    composition_event =
-        CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
            Compose();
-            CoreTiming::ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
        });

-    CoreTiming::ScheduleEvent(frame_ticks, composition_event);
+    core_timing.ScheduleEvent(frame_ticks, composition_event);
 }

 NVFlinger::~NVFlinger() {
-    CoreTiming::UnscheduleEvent(composition_event, 0);
+    core_timing.UnscheduleEvent(composition_event, 0);
 }

 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -14,9 +14,10 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/object.h"

-namespace CoreTiming {
+namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace Kernel {
 class ReadableEvent;
@@ -52,7 +53,7 @@ struct Display {

 class NVFlinger final {
 public:
-    NVFlinger();
+    explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
    ~NVFlinger();

    /// Sets the NVDrv module instance to use to send buffers to the GPU.
@@ -115,8 +116,11 @@ private:
    /// layers.
    u32 next_buffer_queue_id = 1;

-    /// CoreTiming event that handles screen composition.
-    CoreTiming::EventType* composition_event;
+    /// Event that handles screen composition.
+    Core::Timing::EventType* composition_event;
+
+    /// Core timing instance for registering/unregistering the composition event.
+    Core::Timing::CoreTiming& core_timing;
 };

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -194,10 +194,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
 // Module interface

 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) {
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs) {
    // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
    // here and pass it into the respective InstallInterfaces functions.
-    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>();
+    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());

    SM::ServiceManager::InstallInterfaces(sm);

--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace Service

+namespace Core {
+class System;
+}
+
+namespace FileSys {
+class VfsFilesystem;
+}
+
 namespace Kernel {
 class ClientPort;
 class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
 class HLERequestContext;
 } // namespace Kernel

-namespace FileSys {
-class VfsFilesystem;
-}
-
 namespace Service {

 namespace SM {
@@ -178,7 +182,8 @@ private:
 };

 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs);
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs);

 /// Shutdown ServiceManager
 void Shutdown();
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
 #include <chrono>
 #include <ctime>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
    void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Time, "called");

-        SteadyClockTimePoint steady_clock_time_point{
-            CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000};
+        const auto& core_timing = Core::System::GetInstance().CoreTiming();
+        const SteadyClockTimePoint steady_clock_time_point{
+            Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
        IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
        return;
    }

+    const auto& core_timing = Core::System::GetInstance().CoreTiming();
    const SteadyClockTimePoint steady_clock_time_point{
-        CoreTiming::cyclesToMs(CoreTiming::GetTicks()) / 1000, {}};
+        Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};

    CalendarTime calendar_time{};
    calendar_time.year = tm->tm_year + 1900;
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
    REQUIRE(lateness == cycles_late);
 }

-class ScopeInit final {
-public:
+struct ScopeInit final {
    ScopeInit() {
-        CoreTiming::Init();
+        core_timing.Initialize();
    }
    ~ScopeInit() {
-        CoreTiming::Shutdown();
+        core_timing.Shutdown();
    }
+
+    Core::Timing::CoreTiming core_timing;
 };

-static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
-                            int cpu_downcount = 0) {
+static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
+                            int expected_lateness = 0, int cpu_downcount = 0) {
    callbacks_ran_flags = 0;
    expected_callback = CB_IDS[idx];
    lateness = expected_lateness;

-    CoreTiming::AddTicks(CoreTiming::GetDowncount() -
-                         cpu_downcount); // Pretend we executed X cycles of instructions.
-    CoreTiming::Advance();
+    // Pretend we executed X cycles of instructions.
+    core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
+    core_timing.Advance();

    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
-    REQUIRE(downcount == CoreTiming::GetDowncount());
+    REQUIRE(downcount == core_timing.GetDowncount());
 }

 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);

    // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();

    // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(500, cb_b, CB_IDS[1]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(800, cb_c, CB_IDS[2]);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(100, cb_d, CB_IDS[3]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEvent(1200, cb_e, CB_IDS[4]);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
+    REQUIRE(100 == core_timing.GetDowncount());

-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }

 TEST_CASE("CoreTiming[Threadsave]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", CallbackTemplate<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);

    // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();

    // D -> B -> C -> A -> E
-    CoreTiming::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1000);
-    REQUIRE(1000 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
+    core_timing.ForceExceptionCheck(1000);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(500);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
+    core_timing.ForceExceptionCheck(500);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(800);
-    REQUIRE(500 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
+    core_timing.ForceExceptionCheck(800);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(100);
-    REQUIRE(100 == CoreTiming::GetDowncount());
-    CoreTiming::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
+    core_timing.ForceExceptionCheck(100);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    CoreTiming::ForceExceptionCheck(1200);
-    REQUIRE(100 == CoreTiming::GetDowncount());
+    core_timing.ForceExceptionCheck(1200);
+    REQUIRE(100 == core_timing.GetDowncount());

-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }

 namespace SharedSlotTest {
@@ -142,59 +145,63 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
    using namespace SharedSlotTest;

    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", FifoCallback<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", FifoCallback<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", FifoCallback<2>);
-    CoreTiming::EventType* cb_d = CoreTiming::RegisterEvent("callbackD", FifoCallback<3>);
-    CoreTiming::EventType* cb_e = CoreTiming::RegisterEvent("callbackE", FifoCallback<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);

-    CoreTiming::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(1000, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_d, CB_IDS[3]);
-    CoreTiming::ScheduleEvent(1000, cb_e, CB_IDS[4]);
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
+    core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);

    // Enter slice 0
-    CoreTiming::Advance();
-    REQUIRE(1000 == CoreTiming::GetDowncount());
+    core_timing.Advance();
+    REQUIRE(1000 == core_timing.GetDowncount());

    callbacks_ran_flags = 0;
    counter = 0;
    lateness = 0;
-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance();
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance();
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
    REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
 }

-TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
+TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);

    // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();

-    CoreTiming::ScheduleEvent(100, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(200, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);

-    AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
-    AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
+    AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
+    AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
 }

 namespace ChainSchedulingTest {
 static int reschedules = 0;

-static void RescheduleCallback(u64 userdata, s64 cycles_late) {
+static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
+                               s64 cycles_late) {
    --reschedules;
    REQUIRE(reschedules >= 0);
    REQUIRE(lateness == cycles_late);

-    if (reschedules > 0)
-        CoreTiming::ScheduleEvent(1000, reinterpret_cast<CoreTiming::EventType*>(userdata),
+    if (reschedules > 0) {
+        core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
                                  userdata);
+    }
 }
 } // namespace ChainSchedulingTest

@@ -202,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
    using namespace ChainSchedulingTest;

    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    CoreTiming::EventType* cb_a = CoreTiming::RegisterEvent("callbackA", CallbackTemplate<0>);
-    CoreTiming::EventType* cb_b = CoreTiming::RegisterEvent("callbackB", CallbackTemplate<1>);
-    CoreTiming::EventType* cb_c = CoreTiming::RegisterEvent("callbackC", CallbackTemplate<2>);
-    CoreTiming::EventType* cb_rs =
-        CoreTiming::RegisterEvent("callbackReschedule", RescheduleCallback);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
+        "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
+            RescheduleCallback(core_timing, userdata, cycles_late);
+        });

    // Enter slice 0
-    CoreTiming::Advance();
+    core_timing.Advance();

-    CoreTiming::ScheduleEvent(800, cb_a, CB_IDS[0]);
-    CoreTiming::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    CoreTiming::ScheduleEvent(2200, cb_c, CB_IDS[2]);
-    CoreTiming::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
-    REQUIRE(800 == CoreTiming::GetDowncount());
+    core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
+    REQUIRE(800 == core_timing.GetDowncount());

    reschedules = 3;
-    AdvanceAndCheck(0, 200);  // cb_a
-    AdvanceAndCheck(1, 1000); // cb_b, cb_rs
+    AdvanceAndCheck(core_timing, 0, 200);  // cb_a
+    AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
    REQUIRE(2 == reschedules);

-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
    REQUIRE(1 == reschedules);
-    REQUIRE(200 == CoreTiming::GetDowncount());
+    REQUIRE(200 == core_timing.GetDowncount());

-    AdvanceAndCheck(2, 800); // cb_c
+    AdvanceAndCheck(core_timing, 2, 800); // cb_c

-    CoreTiming::AddTicks(CoreTiming::GetDowncount());
-    CoreTiming::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
    REQUIRE(0 == reschedules);
-    REQUIRE(MAX_SLICE_LENGTH == CoreTiming::GetDowncount());
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
 }
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,12 +5,12 @@ add_library(video_core STATIC
    debug_utils/debug_utils.h
    engines/fermi_2d.cpp
    engines/fermi_2d.h
+    engines/kepler_compute.cpp
+    engines/kepler_compute.h
    engines/kepler_memory.cpp
    engines/kepler_memory.h
    engines/maxwell_3d.cpp
    engines/maxwell_3d.h
-    engines/maxwell_compute.cpp
-    engines/maxwell_compute.h
    engines/maxwell_dma.cpp
    engines/maxwell_dma.h
    engines/shader_bytecode.h
@@ -101,6 +101,16 @@ add_library(video_core STATIC
    video_core.h
 )

+if (ENABLE_VULKAN)
+    target_sources(video_core PRIVATE
+        renderer_vulkan/declarations.h
+        renderer_vulkan/vk_device.cpp
+        renderer_vulkan/vk_device.h)
+
+    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
+endif()
+
 create_target_directory_groups(video_core)

 target_link_libraries(video_core PUBLIC common core)
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {}
+
+KeplerCompute::~KeplerCompute() = default;
+
+void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
+    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+               "Invalid KeplerCompute register, increase the size of the Regs structure");
+
+    regs.reg_array[method_call.method] = method_call.argument;
+
+    switch (method_call.method) {
+    case KEPLER_COMPUTE_REG_INDEX(launch):
+        // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA
+        // kernels)
+        UNREACHABLE_MSG("Compute shaders are not implemented");
+        break;
+    default:
+        break;
+    }
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/maxwell_compute.h
@@ -10,47 +10,48 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
+#include "video_core/memory_manager.h"

 namespace Tegra::Engines {

-#define MAXWELL_COMPUTE_REG_INDEX(field_name)                                                      \
-    (offsetof(Tegra::Engines::MaxwellCompute::Regs, field_name) / sizeof(u32))
+#define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
+    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))

-class MaxwellCompute final {
+class KeplerCompute final {
 public:
-    MaxwellCompute() = default;
-    ~MaxwellCompute() = default;
+    explicit KeplerCompute(MemoryManager& memory_manager);
+    ~KeplerCompute();
+
+    static constexpr std::size_t NumConstBuffers = 8;

    struct Regs {
        static constexpr std::size_t NUM_REGS = 0xCF8;

        union {
            struct {
-                INSERT_PADDING_WORDS(0x281);
+                INSERT_PADDING_WORDS(0xAF);

-                union {
-                    u32 compute_end;
-                    BitField<0, 1, u32> unknown;
-                } compute;
+                u32 launch;

-                INSERT_PADDING_WORDS(0xA76);
+                INSERT_PADDING_WORDS(0xC48);
            };
            std::array<u32, NUM_REGS> reg_array;
        };
    } regs{};
-
    static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
-                  "MaxwellCompute Regs has wrong size");
+                  "KeplerCompute Regs has wrong size");
+
+    MemoryManager& memory_manager;

    /// Write the value to the register identified by method.
    void CallMethod(const GPU::MethodCall& method_call);
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(MaxwellCompute::Regs, field_name) == position * 4,                      \
+    static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4,                       \
                  "Field " #field_name " has invalid position")

-ASSERT_REG_POSITION(compute, 0x281);
+ASSERT_REG_POSITION(launch, 0xAF);

 #undef ASSERT_REG_POSITION

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
            LongQueryResult query_result{};
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = CoreTiming::GetTicks();
+            query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
--- a/src/video_core/engines/maxwell_compute.cpp
+++ b/src/video_core/engines/maxwell_compute.cpp
@@ -1,28 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/logging/log.h"
-#include "core/core.h"
-#include "video_core/engines/maxwell_compute.h"
-
-namespace Tegra::Engines {
-
-void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
-               "Invalid MaxwellCompute register, increase the size of the Regs structure");
-
-    regs.reg_array[method_call.method] = method_call.argument;
-
-    switch (method_call.method) {
-    case MAXWELL_COMPUTE_REG_INDEX(compute): {
-        LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
-        UNREACHABLE();
-        break;
-    }
-    default:
-        break;
-    }
-}
-
-} // namespace Tegra::Engines
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -186,7 +186,7 @@ enum class SubOp : u64 {
 };

 enum class F2iRoundingOp : u64 {
-    None = 0,
+    RoundEven = 0,
    Floor = 1,
    Ceil = 2,
    Trunc = 3,
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,12 +3,13 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/kepler_memory.h"
 #include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/maxwell_compute.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
@@ -18,6 +19,7 @@ namespace Tegra {
 u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    switch (format) {
    case PixelFormat::ABGR8:
+    case PixelFormat::BGRA8:
        return 4;
    default:
        return 4;
@@ -31,7 +33,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
-    maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
 }
@@ -245,8 +247,8 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
    case EngineID::MAXWELL_B:
        maxwell_3d->CallMethod(method_call);
        break;
-    case EngineID::MAXWELL_COMPUTE_B:
-        maxwell_compute->CallMethod(method_call);
+    case EngineID::KEPLER_COMPUTE_B:
+        kepler_compute->CallMethod(method_call);
        break;
    case EngineID::MAXWELL_DMA_COPY_A:
        maxwell_dma->CallMethod(method_call);
@@ -282,7 +284,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = CoreTiming::GetTicks();
+        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
        Memory::WriteBlock(*address, &block, sizeof(block));
    } else {
        const auto address =
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -80,6 +80,7 @@ class DebugContext;
 struct FramebufferConfig {
    enum class PixelFormat : u32 {
        ABGR8 = 1,
+        BGRA8 = 5,
    };

    /**
@@ -102,15 +103,15 @@ struct FramebufferConfig {
 namespace Engines {
 class Fermi2D;
 class Maxwell3D;
-class MaxwellCompute;
 class MaxwellDMA;
+class KeplerCompute;
 class KeplerMemory;
 } // namespace Engines

 enum class EngineID {
    FERMI_TWOD_A = 0x902D, // 2D Engine
    MAXWELL_B = 0xB197,    // 3D Engine
-    MAXWELL_COMPUTE_B = 0xB1C0,
+    KEPLER_COMPUTE_B = 0xB1C0,
    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
    MAXWELL_DMA_COPY_A = 0xB0B5,
 };
@@ -208,7 +209,7 @@ private:
    /// 2D engine
    std::unique_ptr<Engines::Fermi2D> fermi_2d;
    /// Compute engine
-    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
    /// DMA engine
    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
    /// Inline memory engine
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -125,6 +125,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,

    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
    params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
+    if (!params.is_tiled) {
+        params.pitch = config.tic.Pitch();
+    }
    params.unaligned_height = config.tic.Height();
    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
    params.identity = SurfaceClass::Uploaded;
@@ -191,7 +194,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
-    params.width = config.width;
+    if (params.is_tiled) {
+        params.width = config.width;
+    } else {
+        params.pitch = config.width;
+        const u32 bpp = params.GetFormatBpp() / 8;
+        params.width = params.pitch / bpp;
+    }
    params.height = config.height;
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
@@ -694,9 +703,20 @@ void CachedSurface::LoadGLBuffer() {
        for (u32 i = 0; i < params.max_mip_level; i++)
            SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
    } else {
-        const auto texture_src_data{Memory::GetPointer(params.addr)};
-        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
-        gl_buffer[0].assign(texture_src_data, texture_src_data_end);
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr),
+                        params.size_in_bytes_gl);
+        } else {
+            const u8* start = Memory::GetPointer(params.addr);
+            u8* write_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(write_to, start, copy_size);
+                start += params.pitch;
+                write_to += copy_size;
+            }
+        }
    }
    for (u32 i = 0; i < params.max_mip_level; i++) {
        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
@@ -733,7 +753,19 @@ void CachedSurface::FlushGLBuffer() {

        SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
    } else {
-        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
+        const u32 bpp = params.GetFormatBpp() / 8;
+        const u32 copy_size = params.width * bpp;
+        if (params.pitch == copy_size) {
+            std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes());
+        } else {
+            u8* start = Memory::GetPointer(params.addr);
+            const u8* read_to = gl_buffer[0].data();
+            for (u32 h = params.height; h > 0; h--) {
+                std::memcpy(start, read_to, copy_size);
+                start += params.pitch;
+                read_to += copy_size;
+            }
+        }
    }
 }

@@ -859,8 +891,8 @@ void CachedSurface::EnsureTextureView() {
    constexpr GLuint min_level = 0;

    glGenTextures(1, &texture_view.handle);
-    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
-                  params.max_mip_level, 0, 1);
+    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level,
+                  params.max_mip_level, min_layer, num_layers);
    ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
    glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
                         reinterpret_cast<const GLint*>(swizzle.data()));
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,6 +8,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include <vector>

 #include "common/alignment.h"
@@ -35,7 +36,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;

 struct SurfaceParams {
-
    enum class SurfaceClass {
        Uploaded,
        RenderTarget,
@@ -168,20 +168,27 @@ struct SurfaceParams {
    }

    u32 MipBlockDepth(u32 mip_level) const {
-        if (mip_level == 0)
+        if (mip_level == 0) {
            return block_depth;
-        if (is_layered)
+        }
+
+        if (is_layered) {
            return 1;
-        u32 depth = MipDepth(mip_level);
+        }
+
+        const u32 mip_depth = MipDepth(mip_level);
        u32 bd = 32;
-        while (bd > 1 && depth * 2 <= bd) {
+        while (bd > 1 && mip_depth * 2 <= bd) {
            bd >>= 1;
        }
+
        if (bd == 32) {
-            u32 bh = MipBlockHeight(mip_level);
-            if (bh >= 4)
+            const u32 bh = MipBlockHeight(mip_level);
+            if (bh >= 4) {
                return 16;
+            }
        }
+
        return bd;
    }

@@ -272,6 +279,7 @@ struct SurfaceParams {
    u32 height;
    u32 depth;
    u32 unaligned_height;
+    u32 pitch;
    SurfaceTarget target;
    SurfaceClass identity;
    u32 max_mip_level;
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -171,7 +171,7 @@ public:
            code.AddLine(fmt::format("case 0x{:x}u: {{", address));
            ++code.scope;

-            VisitBasicBlock(bb);
+            VisitBlock(bb);

            --code.scope;
            code.AddLine('}');
@@ -423,7 +423,7 @@ private:
            code.AddNewLine();
    }

-    void VisitBasicBlock(const BasicBlock& bb) {
+    void VisitBlock(const NodeBlock& bb) {
        for (const Node node : bb) {
            if (const std::string expr = Visit(node); !expr.empty()) {
                code.AddLine(expr);
@@ -575,7 +575,7 @@ private:
            code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
            ++code.scope;

-            VisitBasicBlock(conditional->GetCode());
+            VisitBlock(conditional->GetCode());

            --code.scope;
            code.AddLine('}');
@@ -616,17 +616,8 @@ private:

    std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
        std::string value = VisitOperand(operation, operand_index);
-
        switch (type) {
-        case Type::Bool:
-        case Type::Bool2:
-        case Type::Float:
-            return value;
-        case Type::Int:
-            return "ftoi(" + value + ')';
-        case Type::Uint:
-            return "ftou(" + value + ')';
-        case Type::HalfFloat:
+        case Type::HalfFloat: {
            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
            if (!half_meta) {
                value = "toHalf2(" + value + ')';
@@ -643,6 +634,26 @@ private:
                return "vec2(toHalf2(" + value + ")[1])";
            }
        }
+        default:
+            return CastOperand(value, type);
+        }
+    }
+
+    std::string CastOperand(const std::string& value, Type type) const {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return "ftoi(" + value + ')';
+        case Type::Uint:
+            return "ftou(" + value + ')';
+        case Type::HalfFloat:
+            // Can't be handled as a stand-alone value
+            UNREACHABLE();
+            return value;
+        }
        UNREACHABLE();
        return value;
    }
@@ -650,6 +661,7 @@ private:
    std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
        switch (type) {
        case Type::Bool:
+        case Type::Bool2:
        case Type::Float:
            if (needs_parenthesis) {
                return '(' + value + ')';
@@ -719,45 +731,51 @@ private:
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);

+        const std::size_t count = operation.GetOperandsCount();
+        const bool has_array = meta->sampler.IsArray();
+        const bool has_shadow = meta->sampler.IsShadow();
+
        std::string expr = func;
        expr += '(';
        expr += GetSampler(meta->sampler);
        expr += ", ";

-        expr += coord_constructors[meta->coords_count - 1];
+        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
-            const bool is_extra = i >= meta->coords_count;
-            const bool is_array = i == meta->array_index;
+        for (std::size_t i = 0; i < count; ++i) {
+            expr += Visit(operation[i]);

-            std::string operand = [&]() {
-                if (is_extra && is_extra_int) {
-                    if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) {
-                        return std::to_string(static_cast<s32>(immediate->GetValue()));
-                    } else {
-                        return "ftoi(" + Visit(operation[i]) + ')';
-                    }
-                } else {
-                    return Visit(operation[i]);
-                }
-            }();
-            if (is_array) {
-                ASSERT(!is_extra);
-                operand = "float(ftoi(" + operand + "))";
-            }
-
-            expr += operand;
-
-            if (i + 1 == meta->coords_count) {
-                expr += ')';
-            }
-            if (i + 1 < count) {
+            const std::size_t next = i + 1;
+            if (next < count || has_array || has_shadow)
                expr += ", ";
+        }
+        if (has_array) {
+            expr += "float(ftoi(" + Visit(meta->array) + "))";
+        }
+        if (has_shadow) {
+            if (has_array)
+                expr += ", ";
+            expr += Visit(meta->depth_compare);
+        }
+        expr += ')';
+
+        for (const Node extra : meta->extras) {
+            expr += ", ";
+            if (is_extra_int) {
+                if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
+                    // Inline the string as an immediate integer in GLSL (some extra arguments are
+                    // required to be constant)
+                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+                } else {
+                    expr += "ftoi(" + Visit(extra) + ')';
+                }
+            } else {
+                expr += Visit(extra);
            }
        }
+
        expr += ')';
        return expr;
    }
@@ -1134,7 +1152,7 @@ private:
                                  Type::HalfFloat);
    }

-    std::string F4Texture(Operation operation) {
+    std::string Texture(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1145,7 +1163,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }

-    std::string F4TextureLod(Operation operation) {
+    std::string TextureLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1156,7 +1174,7 @@ private:
        return expr + GetSwizzle(meta->element);
    }

-    std::string F4TextureGather(Operation operation) {
+    std::string TextureGather(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1164,7 +1182,7 @@ private:
               GetSwizzle(meta->element);
    }

-    std::string F4TextureQueryDimensions(Operation operation) {
+    std::string TextureQueryDimensions(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1184,7 +1202,7 @@ private:
        return "0";
    }

-    std::string F4TextureQueryLod(Operation operation) {
+    std::string TextureQueryLod(Operation operation) {
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

@@ -1195,29 +1213,33 @@ private:
        return "0";
    }

-    std::string F4TexelFetch(Operation operation) {
+    std::string TexelFetch(Operation operation) {
        constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
        ASSERT(meta);
+        UNIMPLEMENTED_IF(meta->sampler.IsArray());
+        const std::size_t count = operation.GetOperandsCount();

        std::string expr = "texelFetch(";
        expr += GetSampler(meta->sampler);
        expr += ", ";

-        expr += constructors[meta->coords_count - 1];
+        expr += constructors.at(operation.GetOperandsCount() - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
            expr += VisitOperand(operation, i, Type::Int);
-
-            if (i + 1 == meta->coords_count) {
+            const std::size_t next = i + 1;
+            if (next == count)
                expr += ')';
-            }
-            if (i + 1 < count) {
+            else if (next < count)
                expr += ", ";
-            }
+        }
+        for (std::size_t i = 0; i < meta->extras.size(); ++i) {
+            expr += ", ";
+            expr += CastOperand(Visit(meta->extras.at(i)), Type::Int);
        }
        expr += ')';
+
        return expr + GetSwizzle(meta->element);
    }

@@ -1454,12 +1476,12 @@ private:
        &GLSLDecompiler::Logical2HNotEqual,
        &GLSLDecompiler::Logical2HGreaterEqual,

-        &GLSLDecompiler::F4Texture,
-        &GLSLDecompiler::F4TextureLod,
-        &GLSLDecompiler::F4TextureGather,
-        &GLSLDecompiler::F4TextureQueryDimensions,
-        &GLSLDecompiler::F4TextureQueryLod,
-        &GLSLDecompiler::F4TexelFetch,
+        &GLSLDecompiler::Texture,
+        &GLSLDecompiler::TextureLod,
+        &GLSLDecompiler::TextureGather,
+        &GLSLDecompiler::TextureQueryDimensions,
+        &GLSLDecompiler::TextureQueryLod,
+        &GLSLDecompiler::TexelFetch,

        &GLSLDecompiler::Branch,
        &GLSLDecompiler::PushFlowStack,
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ RendererOpenGL::~RendererOpenGL() = default;
 void RendererOpenGL::SwapBuffers(
    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {

-    Core::System::GetInstance().GetPerfStats().EndSystemFrame();
+    system.GetPerfStats().EndSystemFrame();

    // Maintain the rasterizer's state as a priority
    OpenGLState prev_state = OpenGLState::GetCurState();
@@ -137,8 +137,8 @@ void RendererOpenGL::SwapBuffers(

    render_window.PollEvents();

-    Core::System::GetInstance().FrameLimiter().DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
-    Core::System::GetInstance().GetPerfStats().BeginSystemFrame();
+    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
+    system.GetPerfStats().BeginSystemFrame();

    // Restore the rasterizer state
    prev_state.Apply();
--- a/src/video_core/renderer_vulkan/declarations.h
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vulkan/vulkan.hpp>
+
+namespace Vulkan {
+
+// vulkan.hpp unique handlers use DispatchLoaderStatic
+template <typename T>
+using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
+
+using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
+using UniqueBuffer = UniqueHandle<vk::Buffer>;
+using UniqueBufferView = UniqueHandle<vk::BufferView>;
+using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
+using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
+using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
+using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
+using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
+using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
+using UniqueDevice = UniqueHandle<vk::Device>;
+using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
+using UniqueEvent = UniqueHandle<vk::Event>;
+using UniqueFence = UniqueHandle<vk::Fence>;
+using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
+using UniqueImage = UniqueHandle<vk::Image>;
+using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
+using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
+using UniquePipeline = UniqueHandle<vk::Pipeline>;
+using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
+using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
+using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
+using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
+using UniqueSampler = UniqueHandle<vk::Sampler>;
+using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
+using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
+using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
+using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <optional>
+#include <set>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+
+namespace Vulkan {
+
+namespace Alternatives {
+
+constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
+    vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
+constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
+    vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+
+} // namespace Alternatives
+
+constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
+    switch (format) {
+    case vk::Format::eD24UnormS8Uint:
+        return Alternatives::Depth24UnormS8Uint.data();
+    case vk::Format::eD16UnormS8Uint:
+        return Alternatives::Depth16UnormS8Uint.data();
+    default:
+        return nullptr;
+    }
+}
+
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
+                                                   FormatType format_type) {
+    switch (format_type) {
+    case FormatType::Linear:
+        return properties.linearTilingFeatures;
+    case FormatType::Optimal:
+        return properties.optimalTilingFeatures;
+    case FormatType::Buffer:
+        return properties.bufferFeatures;
+    default:
+        return {};
+    }
+}
+
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                   vk::SurfaceKHR surface)
+    : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+    SetupFamilies(dldi, surface);
+    SetupProperties(dldi);
+}
+
+VKDevice::~VKDevice() = default;
+
+bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+    const auto queue_cis = GetDeviceQueueCreateInfos();
+    vk::PhysicalDeviceFeatures device_features{};
+
+    const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
+                                         0, nullptr, static_cast<u32>(extensions.size()),
+                                         extensions.data(), &device_features);
+    vk::Device dummy_logical;
+    if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
+        return false;
+    }
+
+    dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
+    logical = UniqueDevice(
+        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+
+    graphics_queue = logical->getQueue(graphics_family, 0, dld);
+    present_queue = logical->getQueue(present_family, 0, dld);
+    return true;
+}
+
+vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
+                                        vk::FormatFeatureFlags wanted_usage,
+                                        FormatType format_type) const {
+    if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
+        return wanted_format;
+    }
+    // The wanted format is not supported by hardware, search for alternatives
+    const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
+    if (alternatives == nullptr) {
+        LOG_CRITICAL(Render_Vulkan,
+                     "Format={} with usage={} and type={} has no defined alternatives and host "
+                     "hardware does not support it",
+                     static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                     static_cast<u32>(format_type));
+        UNREACHABLE();
+        return wanted_format;
+    }
+
+    std::size_t i = 0;
+    for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
+         alternative = alternatives[++i]) {
+        if (!IsFormatSupported(alternative, wanted_usage, format_type))
+            continue;
+        LOG_WARNING(Render_Vulkan,
+                    "Emulating format={} with alternative format={} with usage={} and type={}",
+                    static_cast<u32>(wanted_format), static_cast<u32>(alternative),
+                    static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
+        return alternative;
+    }
+
+    // No alternatives found, panic
+    LOG_CRITICAL(Render_Vulkan,
+                 "Format={} with usage={} and type={} is not supported by the host hardware and "
+                 "doesn't support any of the alternatives",
+                 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                 static_cast<u32>(format_type));
+    UNREACHABLE();
+    return wanted_format;
+}
+
+bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                 FormatType format_type) const {
+    const auto it = format_properties.find(wanted_format);
+    if (it == format_properties.end()) {
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
+                     static_cast<u32>(wanted_format));
+        UNREACHABLE();
+        return true;
+    }
+    const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+    return (supported_usage & wanted_usage) == wanted_usage;
+}
+
+bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                          vk::SurfaceKHR surface) {
+    const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+
+    bool has_swapchain{};
+    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+        has_swapchain |= prop.extensionName == swapchain_extension;
+    }
+    if (!has_swapchain) {
+        // The device doesn't support creating swapchains.
+        return false;
+    }
+
+    bool has_graphics{}, has_present{};
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        const auto& family = queue_family_properties[i];
+        if (family.queueCount == 0)
+            continue;
+
+        has_graphics |=
+            (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
+        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+    }
+    if (!has_graphics || !has_present) {
+        // The device doesn't have a graphics and present queue.
+        return false;
+    }
+
+    // TODO(Rodrigo): Check if the device matches all requeriments.
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    if (props.limits.maxUniformBufferRange < 65536) {
+        return false;
+    }
+
+    // Device is suitable.
+    return true;
+}
+
+void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+    std::optional<u32> graphics_family_, present_family_;
+
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        if (graphics_family_ && present_family_)
+            break;
+
+        const auto& queue_family = queue_family_properties[i];
+        if (queue_family.queueCount == 0)
+            continue;
+
+        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+            graphics_family_ = i;
+        if (physical.getSurfaceSupportKHR(i, surface, dldi))
+            present_family_ = i;
+    }
+    ASSERT(graphics_family_ && present_family_);
+
+    graphics_family = *graphics_family_;
+    present_family = *present_family_;
+}
+
+void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    device_type = props.deviceType;
+    uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+}
+
+std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+    static const float QUEUE_PRIORITY = 1.f;
+
+    std::set<u32> unique_queue_families = {graphics_family, present_family};
+    std::vector<vk::DeviceQueueCreateInfo> queue_cis;
+
+    for (u32 queue_family : unique_queue_families)
+        queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
+
+    return queue_cis;
+}
+
+std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    std::map<vk::Format, vk::FormatProperties> format_properties;
+
+    const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+    };
+    AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
+    AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+    AddFormatQuery(vk::Format::eD32Sfloat);
+    AddFormatQuery(vk::Format::eD16UnormS8Uint);
+    AddFormatQuery(vk::Format::eD24UnormS8Uint);
+    AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+
+    return format_properties;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+/// Format usage descriptor
+enum class FormatType { Linear, Optimal, Buffer };
+
+/// Handles data specific to a physical device.
+class VKDevice final {
+public:
+    explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                      vk::SurfaceKHR surface);
+    ~VKDevice();
+
+    /// Initializes the device. Returns true on success.
+    bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+
+    /**
+     * Returns a format supported by the device for the passed requeriments.
+     * @param wanted_format The ideal format to be returned. It may not be the returned format.
+     * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
+     * @param format_type Format type usage.
+     * @returns A format supported by the device.
+     */
+    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                  FormatType format_type) const;
+
+    /// Returns the dispatch loader with direct function pointers of the device
+    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
+        return dld;
+    }
+
+    /// Returns the logical device
+    vk::Device GetLogical() const {
+        return logical.get();
+    }
+
+    /// Returns the physical device.
+    vk::PhysicalDevice GetPhysical() const {
+        return physical;
+    }
+
+    /// Returns the main graphics queue.
+    vk::Queue GetGraphicsQueue() const {
+        return graphics_queue;
+    }
+
+    /// Returns the main present queue.
+    vk::Queue GetPresentQueue() const {
+        return present_queue;
+    }
+
+    /// Returns main graphics queue family index.
+    u32 GetGraphicsFamily() const {
+        return graphics_family;
+    }
+
+    /// Returns main present queue family index.
+    u32 GetPresentFamily() const {
+        return present_family;
+    }
+
+    /// Returns if the device is integrated with the host CPU
+    bool IsIntegrated() const {
+        return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+    }
+
+    /// Returns uniform buffer alignment requeriment
+    u64 GetUniformBufferAlignment() const {
+        return uniform_buffer_alignment;
+    }
+
+    /// Checks if the physical device is suitable.
+    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                           vk::SurfaceKHR surface);
+
+private:
+    /// Sets up queue families.
+    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+
+    /// Sets up device properties.
+    void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+
+    /// Returns a list of queue initialization descriptors.
+    std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+
+    /// Returns true if a format is supported.
+    bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                           FormatType format_type) const;
+
+    /// Returns the device properties for Vulkan formats.
+    static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+        const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+
+    const vk::PhysicalDevice physical;  ///< Physical device
+    vk::DispatchLoaderDynamic dld;      ///< Device function pointers
+    UniqueDevice logical;               ///< Logical device
+    vk::Queue graphics_queue;           ///< Main graphics queue
+    vk::Queue present_queue;            ///< Main present queue
+    u32 graphics_family{};              ///< Main graphics queue family index
+    u32 present_family{};               ///< Main present queue family index
+    vk::PhysicalDeviceType device_type; ///< Physical device type
+    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
+    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
    return exit_method = ExitMethod::AlwaysReturn;
 }

-BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
-    BasicBlock basic_block;
+NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
+    NodeBlock basic_block;
    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
        pc = DecodeInstr(basic_block, pc);
    }
    return basic_block;
 }

-u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
    // Ignore sched instructions when generating code.
    if (IsSchedInstruction(pc, main_offset)) {
        return pc + 1;
@@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
                         "NeverExecute predicate not implemented");

-    static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)>
-        decoders = {
-            {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
-            {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
-            {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
-            {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
-            {OpCode::Type::Shift, &ShaderIR::DecodeShift},
-            {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
-            {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
-            {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
-            {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
-            {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
-            {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
-            {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
-            {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
-            {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
-            {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
-            {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
-            {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
-            {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
-            {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
-            {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
-            {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
-            {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
-            {OpCode::Type::Video, &ShaderIR::DecodeVideo},
-            {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
-        };
+    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
+        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
+        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
+        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
+        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
+        {OpCode::Type::Shift, &ShaderIR::DecodeShift},
+        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
+        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
+        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
+        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
+        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
+        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
+        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
+        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
+        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
+        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
+        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
+        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
+        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
+        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
+        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
+        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
+        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
+        {OpCode::Type::Video, &ShaderIR::DecodeVideo},
+        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
+    };

    std::vector<Node> tmp_block;
    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
-        pc = (this->*decoder->second)(tmp_block, bb, pc);
+        pc = (this->*decoder->second)(tmp_block, pc);
    } else {
-        pc = DecodeOther(tmp_block, bb, pc);
+        pc = DecodeOther(tmp_block, pc);
    }

    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
@@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) {
    const auto pred_index = static_cast<u32>(instr.pred.pred_index);

    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
-        bb.push_back(
-            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)));
+        const Node conditional =
+            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
+        global_code.push_back(conditional);
+        bb.push_back(conditional);
    } else {
        for (auto& node : tmp_block) {
-            bb.push_back(std::move(node));
+            global_code.push_back(node);
+            bb.push_back(node);
        }
    }

--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::SubOp;

-u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3

        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);

-        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
@@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
    return pc;
 }

-void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
+void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
                                    Node imm_lut, bool sets_cc) {
    constexpr u32 lop_iterations = 32;
    const Node one = Immediate(1);
@@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
    SetRegister(bb, dest, value);
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
 using Tegra::Shader::PredicateResultMode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock&
    return pc;
 }

-void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
-                                   Node op_a, Node op_b, PredicateResultMode predicate_mode,
-                                   Pred predicate, bool sets_cc) {
+void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
+                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
+                                   bool sets_cc) {
    const Node result = [&]() {
        switch (logic_op) {
        case LogicOperation::And:
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {

        value = [&]() {
            switch (instr.conversion.f2i.rounding) {
-            case Tegra::Shader::F2iRoundingOp::None:
-                return value;
+            case Tegra::Shader::F2iRoundingOp::RoundEven:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Floor:
                return Operation(OperationCode::FFloor, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Ceil:
@@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;

-u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
    }
 }

-u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

@@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        }();

        const Node addr_register = GetRegister(instr.gpr8);
-        const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
+        const Node base_address =
+            TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
        const auto cbuf = std::get_if<CbufNode>(base_address);
        ASSERT(cbuf != nullptr);
        const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
@@ -305,7 +306,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
    case OpCode::Id::TLD4S: {
        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
-
        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
        }
@@ -314,9 +314,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);

-        std::vector<Node> coords;
-
        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        std::vector<Node> coords;
        if (depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
@@ -327,18 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
            coords.push_back(op_a);
            coords.push_back(op_b);
        }
-        const auto num_coords = static_cast<u32>(coords.size());
-        coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+        std::vector<Node> extras;
+        extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));

        const auto& sampler =
            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);

        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
-            auto params = coords;
-            MetaTexture meta{sampler, element, num_coords};
-            values[element] =
-                Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+            auto coords_copy = coords;
+            MetaTexture meta{sampler, {}, {}, extras, element};
+            values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
        }

        WriteTexsInstructionFloat(bb, instr, values);
@@ -359,12 +357,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        switch (instr.txq.query_type) {
        case Tegra::Shader::TextureQueryType::Dimension: {
            for (u32 element = 0; element < 4; ++element) {
-                if (instr.txq.IsComponentEnabled(element)) {
-                    MetaTexture meta{sampler, element};
-                    const Node value = Operation(OperationCode::F4TextureQueryDimensions,
-                                                 std::move(meta), GetRegister(instr.gpr8));
-                    SetTemporal(bb, indexer++, value);
+                if (!instr.txq.IsComponentEnabled(element)) {
+                    continue;
                }
+                MetaTexture meta{sampler, {}, {}, {}, element};
+                const Node value =
+                    Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+                SetTemporal(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
@@ -411,9 +410,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {

        for (u32 element = 0; element < 2; ++element) {
            auto params = coords;
-            MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
-            const Node value =
-                Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
+            MetaTexture meta{sampler, {}, {}, {}, element};
+            const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
            SetTemporal(bb, element, value);
        }
        for (u32 element = 0; element < 2; ++element) {
@@ -431,7 +429,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");

        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
        }

        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
@@ -464,8 +462,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
    return *used_samplers.emplace(entry).first;
 }

-void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
-                                        const Node4& components) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
    u32 dest_elem = 0;
    for (u32 elem = 0; elem < 4; ++elem) {
        if (!instr.tex.IsComponentEnabled(elem)) {
@@ -480,7 +477,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
    }
 }

-void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
                                         const Node4& components) {
    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -504,7 +501,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
    }
 }

-void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
+void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
                                             const Node4& components) {
    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
    // float instruction).
@@ -535,15 +532,16 @@ void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
 }

 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                               std::size_t array_offset, std::size_t bias_offset,
-                               std::vector<Node>&& coords) {
-    UNIMPLEMENTED_IF_MSG(
-        (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
-            (texture_type == TextureType::TextureCube && is_array && depth_compare),
-        "This method is not supported.");
+                               TextureProcessMode process_mode, std::vector<Node> coords,
+                               Node array, Node depth_compare, u32 bias_offset) {
+    const bool is_array = array;
+    const bool is_shadow = depth_compare;

-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+    UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
+                             (texture_type == TextureType::TextureCube && is_array && is_shadow),
+                         "This method is not supported.");
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);

    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
@@ -552,35 +550,30 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
    // LOD selection (either via bias or explicit textureLod) not supported in GL for
    // sampler2DArrayShadow and samplerCubeArrayShadow.
    const bool gl_lod_supported =
-        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
-          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));

    const OperationCode read_method =
-        lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
+        lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;

    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);

-    std::optional<u32> array_offset_value;
-    if (is_array)
-        array_offset_value = static_cast<u32>(array_offset);
-
-    const auto coords_count = static_cast<u32>(coords.size());
-
+    std::vector<Node> extras;
    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
        if (process_mode == TextureProcessMode::LZ) {
-            coords.push_back(Immediate(0.0f));
+            extras.push_back(Immediate(0.0f));
        } else {
            // If present, lod or bias are always stored in the register indexed by the gpr20
            // field with an offset depending on the usage of the other registers
-            coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+            extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
        }
    }

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset_value};
-        values[element] = Operation(read_method, std::move(meta), std::move(params));
+        auto copy_coords = coords;
+        MetaTexture meta{sampler, array, depth_compare, extras, element};
+        values[element] = Operation(read_method, meta, std::move(copy_coords));
    }

    return values;
@@ -602,28 +595,22 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
    }
-    // 1D.DC in opengl the 2nd component is ignored.
+    // 1D.DC in OpenGL the 2nd component is ignored.
    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
        coords.push_back(Immediate(0.0f));
    }
-    std::size_t array_offset{};
-    if (is_array) {
-        array_offset = coords.size();
-        coords.push_back(GetRegister(array_register));
-    }
+
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
-        // or in the next register if lod or bias are used
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
+        dc = GetRegister(depth_register);
    }

-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
-                          0, std::move(coords));
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
 }

 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -641,6 +628,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
            ? static_cast<u64>(instr.gpr20.Value())
            : coord_register + 1;
+    const u32 bias_offset = coord_count > 2 ? 1 : 0;

    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
@@ -648,24 +636,17 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }

-    std::size_t array_offset{};
-    if (is_array) {
-        array_offset = coords.size();
-        coords.push_back(GetRegister(array_register));
-    }
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+
+    Node dc{};
    if (depth_compare) {
-        // Depth is always stored in the register signaled by gpr20
-        // or in the next register if lod or bias are used
+        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
+        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
-        coords.push_back(GetRegister(depth_register));
-    }
-    // Fill ignored coordinates
-    while (coords.size() < total_coord_count) {
-        coords.push_back(Immediate(0));
+        dc = GetRegister(depth_register);
    }

-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
-                          (coord_count > 2 ? 1 : 0), std::move(coords));
+    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
 }

 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
@@ -680,24 +661,16 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
    const u64 coord_register = array_register + (is_array ? 1 : 0);

    std::vector<Node> coords;
-
-    for (size_t i = 0; i < coord_count; ++i) {
+    for (size_t i = 0; i < coord_count; ++i)
        coords.push_back(GetRegister(coord_register + i));
-    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }

    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
-        values[element] =
-            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
+        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }

    return values;
@@ -705,7 +678,6 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de

 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
    const std::size_t type_coord_count = GetCoordCount(texture_type);
-    const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;

    // If enabled arrays index is always stored in the gpr8 field
@@ -719,33 +691,22 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
            : coord_register + 1;

    std::vector<Node> coords;
-
    for (std::size_t i = 0; i < type_coord_count; ++i) {
        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }
-    std::optional<u32> array_offset;
-    if (is_array) {
-        array_offset = static_cast<u32>(coords.size());
-        coords.push_back(GetRegister(array_register));
-    }
-    const auto coords_count = static_cast<u32>(coords.size());

-    if (lod_enabled) {
-        // When lod is used always is in grp20
-        coords.push_back(GetRegister(instr.gpr20));
-    } else {
-        coords.push_back(Immediate(0));
-    }
+    const Node array = is_array ? GetRegister(array_register) : nullptr;
+    // When lod is used always is in gpr20
+    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);

    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);

    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
-        auto params = coords;
-        MetaTexture meta{sampler, element, coords_count, array_offset};
-        values[element] =
-            Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+        auto coords_copy = coords;
+        MetaTexture meta{sampler, array, {}, {lod}, element};
+        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
 }
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

-u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;

-u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
+u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Fernando Sahmkow	a8d4927e29	Corrections, documenting and fixes.	2019-02-16 16:52:24 -04:00
Fernando Sahmkow	ecccfe0337	Use u128 on Clock Cycles calculation.	2019-02-15 22:57:16 -04:00
Fernando Sahmkow	3ea48e8ebe	Implement 128 bits Unsigned Integer Multiplication and Division.	2019-02-15 22:55:31 -04:00
Fernando Sahmkow	5b7ec71fb7	Correct CNTPCT to use Clock Cycles instead of Cpu Cycles.	2019-02-15 22:55:29 -04:00
James Rowe	99da6362c4	Merge pull request #2123 from lioncash/coretiming-global core_timing: De-globalize core_timing facilities	2019-02-15 19:52:11 -07:00
Lioncash	bd983414f6	core_timing: Convert core timing into a class Gets rid of the largest set of mutable global state within the core. This also paves a way for eliminating usages of GetInstance() on the System class as a follow-up. Note that no behavioral changes have been made, and this simply extracts the functionality into a class. This also has the benefit of making dependencies on the core timing functionality explicit within the relevant interfaces.	2019-02-15 21:50:25 -05:00
bunnei	4327f430f1	Merge pull request #2112 from lioncash/shadowing gl_rasterizer_cache: Get rid of variable shadowing	2019-02-14 21:45:20 -05:00
bunnei	a8fc5d6edd	Merge pull request #2111 from ReinUsesLisp/fetch-fix gl_shader_decompiler: Re-implement TLDS lod	2019-02-14 21:42:34 -05:00
bunnei	fcc3aa0bbf	Merge pull request #2113 from ReinUsesLisp/vulkan-base vulkan: Add dependencies and device abstraction	2019-02-14 10:06:48 -05:00
bunnei	8490e7746a	Merge pull request #2115 from lioncash/local core_timing: Make EmptyTimedCallback a local variable	2019-02-13 21:42:04 -05:00
bunnei	f0c4ac9abd	Merge pull request #2116 from lioncash/size threadsafe_queue: Remove NeedSize template parameter	2019-02-13 21:41:25 -05:00
Lioncash	0829ef97ca	threadsafe_queue: Use std::size_t for representing size Makes it consistent with the regular standard containers in terms of size representation. This also gets rid of dependence on our own type aliases, removing the need for an include.	2019-02-12 22:39:53 -05:00
Lioncash	f0bfb24c61	threadsafe_queue: Remove NeedSize template parameter The necessity of this parameter is dubious at best, and in 2019 probably offers completely negligible savings as opposed to just leaving this enabled. This removes it and simplifies the overall interface.	2019-02-12 22:09:51 -05:00
Lioncash	83ba3515ec	core_timing: Make EmptyTimedCallback a local variable Given this is only used in one place, it can be moved closest to its usage site.	2019-02-12 21:47:18 -05:00
bunnei	cd542d5aac	Merge pull request #2099 from greggameplayer/BGRA8-Framebuffer-Real Implement BGRA8 framebuffer format	2019-02-12 21:44:20 -05:00
bunnei	c425a1a857	Merge pull request #2114 from lioncash/global renderer_opengl: Remove reference to global system instance	2019-02-12 21:40:31 -05:00
ReinUsesLisp	8beca060d1	vk_device: Abstract device handling into a class VKDevice contains all the data required to manage and initialize a physical device. Its intention is to be passed across Vulkan objects to query device-specific data (for example the logical device and the dispatch loader).	2019-02-12 21:43:02 -03:00
Lioncash	86b55cb6df	renderer_opengl: Remove reference to global system instance We already store a reference to the system instance that the renderer is created with, so we don't need to refer to the system instance via Core::System::GetInstance()	2019-02-12 19:33:22 -05:00
bunnei	8135f4bfce	Merge pull request #2110 from lioncash/namespace core_timing: Rename CoreTiming namespace to Core::Timing	2019-02-12 19:26:37 -05:00
bunnei	c440ecfafe	Merge pull request #2104 from ReinUsesLisp/compute-assert kepler_compute: Fixup assert and rename the engine	2019-02-12 19:24:34 -05:00
Lioncash	054e39647c	gl_rasterizer_cache: Remove unnecessary newline	2019-02-12 16:56:19 -05:00
Lioncash	e25c464c02	gl_rasterizer_cache: Get rid of variable shadowing Avoids shadowing the members of the struct itself, which results in a -Wshadow warning.	2019-02-12 16:46:39 -05:00
ReinUsesLisp	18fe910957	renderer_vulkan: Add declarations file This file is intended to be included instead of vulkan/vulkan.hpp. It includes declarations of unique handlers using a dynamic dispatcher instead of a static one (which would require linking to a Vulkan library).	2019-02-12 18:33:02 -03:00
ReinUsesLisp	b12ab4d805	logging: Add Vulkan backend logging class type	2019-02-12 18:33:02 -03:00
ReinUsesLisp	cc94a6d101	cmake: Add Vulkan option	2019-02-12 18:33:02 -03:00
ReinUsesLisp	afb8af9853	gitmodules: Add Vulkan headers dependency	2019-02-12 18:33:02 -03:00
ReinUsesLisp	e60d4d70bc	gl_shader_decompiler: Re-implement TLDS lod	2019-02-12 17:03:07 -03:00
Lioncash	48d9d66dc5	core_timing: Rename CoreTiming namespace to Core::Timing Places all of the timing-related functionality under the existing Core namespace to keep things consistent, rather than having the timing utilities sitting in its own completely separate namespace.	2019-02-12 12:42:17 -05:00
bunnei	444231a83d	Merge pull request #2108 from FernandoS27/fix-cc Fix incorrect value for CC bit in IADD	2019-02-12 10:39:03 -05:00
bunnei	c1accfefde	Merge pull request #2109 from FernandoS27/fix-f2i Corrected F2I None mode to RoundEven.	2019-02-12 10:20:29 -05:00
bunnei	27e5efd265	Merge pull request #2068 from ReinUsesLisp/shader-cleanup-textures shader_ir: Clean texture management code	2019-02-12 10:20:15 -05:00
Fernando Sahmkow	f5ec165e8c	Corrected F2I None mode to RoundEven.	2019-02-11 18:46:45 -04:00
Fernando Sahmkow	edd668047c	Fix incorrect value for CC bit in IADD	2019-02-11 16:44:43 -04:00
ReinUsesLisp	1ddcd0e6f0	kepler_compute: Fixup assert and rename engines When I originally added the compute assert I used the wrong documentation. This addresses that. The dispatch register was tested with homebrew against hardware and is triggered by some games (e.g. Super Mario Odyssey). What exactly is missing to get a valid program bound by this engine requires more investigation.	2019-02-10 19:29:33 -03:00
greggameplayer	a6a73d8892	Implement BGRA8 framebuffer format	2019-02-09 23:44:01 +01:00
bunnei	1d98027a0e	Merge pull request #1904 from bunnei/better-fermi-copy gl_rasterizer: Implement a more accurate fermi 2D copy.	2019-02-08 23:32:24 -05:00
bunnei	2374471a1e	Merge pull request #2096 from FearlessTobi/patch-3 nvdisp_disp0: change drawing message log level from Warning to Trace	2019-02-08 21:56:47 -05:00
Fernando Sahmkow	e543320129	Implement linear textures (#2089 )	2019-02-08 18:28:01 -05:00
bunnei	504aafedd2	Merge pull request #2097 from ReinUsesLisp/fixup-texview gl_rasterizer_cache: Fixup texture view parameters	2019-02-08 17:30:36 -05:00
ReinUsesLisp	e36e7ae74e	gl_rasterizer_cache: Fixup texture view parameters These parameters were declared as constants and passed to glTextureView but then they were removed on a rabase. This addresses that mistake.	2019-02-08 18:32:58 -03:00
Tobias	259e52ccb2	nvdisp_disp0: change drawing message log level from Warning to Trace This is a leftover from the early yuzu days. We shouldn't log every time when we are drawing by default, so let's change the log level to Trace.	2019-02-08 19:26:49 +01:00
ReinUsesLisp	889c646ac0	shader_ir: Remove F4 prefix to texture operations This was originally included because texture operations returned a vec4. These operations now return a single float and the F4 prefix doesn't mean anything.	2019-02-07 17:36:46 -03:00
ReinUsesLisp	d62b0a9e29	shader_ir: Clean texture management code Previous code relied on GLSL parameter order (something that's always ill-formed on an IR design). This approach passes spatial coordiantes through operation nodes and array and depth compare values in the the texture metadata. It still contains an "extra" vector containing generic nodes for bias and component index (for example) which is still a bit ill-formed but it should be better than the previous approach.	2019-02-07 00:46:13 -03:00
bunnei	f09d1dffd1	Merge pull request #2083 from ReinUsesLisp/shader-ir-cbuf-tracking shader/track: Add a more permissive global memory tracking	2019-02-06 21:56:14 -05:00
ReinUsesLisp	0d1d755086	shader/track: Search inside of conditional nodes Some games search conditionally use global memory instructions. This allows the heuristic to search inside conditional nodes for the source constant buffer.	2019-02-03 17:21:20 -03:00
ReinUsesLisp	42b75e8be8	shader_ir: Rename BasicBlock to NodeBlock It's not always used as a basic block. Rename it for consistency.	2019-02-03 17:21:20 -03:00
ReinUsesLisp	6a6fabea58	shader_ir: Pass decoded nodes as a whole instead of per basic blocks Some games call LDG at the top of a basic block, making the tracking heuristic to fail. This commit lets the heuristic the decoded nodes as a whole instead of per basic blocks. This may lead to some false positives but allows it the heuristic to track cases it previously couldn't.	2019-02-03 17:21:20 -03:00