shader/decode: Remove extras from MetaTexture

shader/decode: Split memory and texture instructions decoding
Merge pull request #2156 from FreddyFunk/patch-1
2019-02-26 00:11:30 -03:00 · 2019-02-26 00:11:30 -03:00 · 2019-02-25 18:28:58 -05:00 · 2019-02-25 18:27:43 -05:00 · 2019-02-25 18:25:36 -05:00 · 2019-02-25 13:26:00 -05:00
107 changed files with 3121 additions and 1294 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -37,3 +37,6 @@
 [submodule "discord-rpc"]
    path = externals/discord-rpc
    url = https://github.com/discordapp/discord-rpc.git
+[submodule "Vulkan-Headers"]
+    path = externals/Vulkan-Headers
+    url = https://github.com/KhronosGroup/Vulkan-Headers.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,8 @@ option(YUZU_USE_QT_WEB_ENGINE "Use QtWebEngine for web applet implementation" OF

 option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)

+option(ENABLE_VULKAN "Enables Vulkan backend" ON)
+
 option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)

 if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
+    "${VIDEO_CORE}/shader/decode/texture.cpp"
    "${VIDEO_CORE}/shader/decode/other.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/src/audio_core/audio_out.cpp
+++ b/src/audio_core/audio_out.cpp
@@ -26,14 +26,15 @@ static Stream::Format ChannelsToStreamFormat(u32 num_channels) {
    return {};
 }

-StreamPtr AudioOut::OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
+StreamPtr AudioOut::OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate,
+                               u32 num_channels, std::string&& name,
                               Stream::ReleaseCallback&& release_callback) {
    if (!sink) {
        sink = CreateSinkFromID(Settings::values.sink_id, Settings::values.audio_device_id);
    }

    return std::make_shared<Stream>(
-        sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
+        core_timing, sample_rate, ChannelsToStreamFormat(num_channels), std::move(release_callback),
        sink->AcquireSinkStream(sample_rate, num_channels, name), std::move(name));
 }

--- a/src/audio_core/audio_out.h
+++ b/src/audio_core/audio_out.h
@@ -13,6 +13,10 @@
 #include "audio_core/stream.h"
 #include "common/common_types.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace AudioCore {

 /**
@@ -21,8 +25,8 @@ namespace AudioCore {
 class AudioOut {
 public:
    /// Opens a new audio stream
-    StreamPtr OpenStream(u32 sample_rate, u32 num_channels, std::string&& name,
-                         Stream::ReleaseCallback&& release_callback);
+    StreamPtr OpenStream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, u32 num_channels,
+                         std::string&& name, Stream::ReleaseCallback&& release_callback);

    /// Returns a vector of recently released buffers specified by tag for the specified stream
    std::vector<Buffer::Tag> GetTagsAndReleaseBuffers(StreamPtr stream, std::size_t max_count);
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -8,6 +8,7 @@
 #include "audio_core/codec.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/kernel/writable_event.h"
 #include "core/memory.h"

@@ -71,14 +72,14 @@ private:
    EffectOutStatus out_status{};
    EffectInStatus info{};
 };
-AudioRenderer::AudioRenderer(AudioRendererParameter params,
+AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                             Kernel::SharedPtr<Kernel::WritableEvent> buffer_event)
    : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
      effects(params.effect_count) {

    audio_out = std::make_unique<AudioCore::AudioOut>();
-    stream = audio_out->OpenStream(STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, "AudioRenderer",
-                                   [=]() { buffer_event->Signal(); });
+    stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
+                                   "AudioRenderer", [=]() { buffer_event->Signal(); });
    audio_out->StartStream(stream);

    QueueMixedBuffer(0);
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -14,6 +14,10 @@
 #include "common/swap.h"
 #include "core/hle/kernel/object.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Kernel {
 class WritableEvent;
 }
@@ -208,7 +212,7 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size

 class AudioRenderer {
 public:
-    AudioRenderer(AudioRendererParameter params,
+    AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
                  Kernel::SharedPtr<Kernel::WritableEvent> buffer_event);
    ~AudioRenderer();

--- a/src/audio_core/buffer.h
+++ b/src/audio_core/buffer.h
@@ -21,7 +21,7 @@ public:
    Buffer(Tag tag, std::vector<s16>&& samples) : tag{tag}, samples{std::move(samples)} {}

    /// Returns the raw audio data for the buffer
-    std::vector<s16>& Samples() {
+    std::vector<s16>& GetSamples() {
        return samples;
    }

--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
        }
    }

-    state.yn1 = yn1;
-    state.yn2 = yn2;
+    state.yn1 = static_cast<s16>(yn1);
+    state.yn2 = static_cast<s16>(yn2);

    return ret;
 }
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -46,7 +46,7 @@ public:
        }
    }

-    ~CubebSinkStream() {
+    ~CubebSinkStream() override {
        if (!ctx) {
            return;
        }
@@ -75,11 +75,11 @@ public:
        queue.Push(samples);
    }

-    std::size_t SamplesInQueue(u32 num_channels) const override {
+    std::size_t SamplesInQueue(u32 channel_count) const override {
        if (!ctx)
            return 0;

-        return queue.Size() / num_channels;
+        return queue.Size() / channel_count;
    }

    void Flush() override {
@@ -98,7 +98,7 @@ private:
    u32 num_channels{};

    Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame;
+    std::array<s16, 2> last_frame{};
    std::atomic<bool> should_flush{};
    TimeStretcher time_stretch;

--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -32,12 +32,12 @@ u32 Stream::GetNumChannels() const {
    return {};
 }

-Stream::Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-               SinkStream& sink_stream, std::string&& name_)
+Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+               ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_)
    : sample_rate{sample_rate}, format{format}, release_callback{std::move(release_callback)},
-      sink_stream{sink_stream}, name{std::move(name_)} {
+      sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} {

-    release_event = Core::Timing::RegisterEvent(
+    release_event = core_timing.RegisterEvent(
        name, [this](u64 userdata, int cycles_late) { ReleaseActiveBuffer(); });
 }

@@ -95,12 +95,11 @@ void Stream::PlayNextBuffer() {
    active_buffer = queued_buffers.front();
    queued_buffers.pop();

-    VolumeAdjustSamples(active_buffer->Samples());
+    VolumeAdjustSamples(active_buffer->GetSamples());

    sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());

-    Core::Timing::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event,
-                                          {});
+    core_timing.ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }

 void Stream::ReleaseActiveBuffer() {
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -14,8 +14,9 @@
 #include "common/common_types.h"

 namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace AudioCore {

@@ -42,8 +43,8 @@ public:
    /// Callback function type, used to change guest state on a buffer being released
    using ReleaseCallback = std::function<void()>;

-    Stream(u32 sample_rate, Format format, ReleaseCallback&& release_callback,
-           SinkStream& sink_stream, std::string&& name_);
+    Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format format,
+           ReleaseCallback&& release_callback, SinkStream& sink_stream, std::string&& name_);

    /// Plays the audio stream
    void Play();
@@ -100,6 +101,7 @@ private:
    std::queue<BufferPtr> queued_buffers;     ///< Buffers queued to be played in the stream
    std::queue<BufferPtr> released_buffers;   ///< Buffers recently released from the stream
    SinkStream& sink_stream;                  ///< Output sink for the stream
+    Core::Timing::CoreTiming& core_timing;    ///< Core timing instance.
    std::string name;                         ///< Name of the stream, must be unique
 };

--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
+      "${VIDEO_CORE}/shader/decode/texture.cpp"
      "${VIDEO_CORE}/shader/decode/other.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -40,9 +40,7 @@ public:
    const Impl& operator=(Impl const&) = delete;

    void PushEntry(Entry e) {
-        std::lock_guard<std::mutex> lock(message_mutex);
        message_queue.Push(std::move(e));
-        message_cv.notify_one();
    }

    void AddBackend(std::unique_ptr<Backend> backend) {
@@ -86,15 +84,13 @@ private:
                }
            };
            while (true) {
-                {
-                    std::unique_lock<std::mutex> lock(message_mutex);
-                    message_cv.wait(lock, [&] { return !running || message_queue.Pop(entry); });
-                }
-                if (!running) {
+                entry = message_queue.PopWait();
+                if (entry.final_entry) {
                    break;
                }
                write_logs(entry);
            }
+
            // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a case
            // where a system is repeatedly spamming logs even on close.
            const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100;
@@ -106,14 +102,13 @@ private:
    }

    ~Impl() {
-        running = false;
-        message_cv.notify_one();
+        Entry entry;
+        entry.final_entry = true;
+        message_queue.Push(entry);
        backend_thread.join();
    }

-    std::atomic_bool running{true};
-    std::mutex message_mutex, writing_mutex;
-    std::condition_variable message_cv;
+    std::mutex writing_mutex;
    std::thread backend_thread;
    std::vector<std::unique_ptr<Backend>> backends;
    Common::MPSCQueue<Log::Entry> message_queue;
@@ -232,6 +227,7 @@ void DebuggerBackend::Write(const Entry& entry) {
    CLS(Render)                                                                                    \
    SUB(Render, Software)                                                                          \
    SUB(Render, OpenGL)                                                                            \
+    SUB(Render, Vulkan)                                                                            \
    CLS(Audio)                                                                                     \
    SUB(Audio, DSP)                                                                                \
    SUB(Audio, Sink)                                                                               \
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -27,6 +27,7 @@ struct Entry {
    unsigned int line_num;
    std::string function;
    std::string message;
+    bool final_entry = false;

    Entry() = default;
    Entry(Entry&& o) = default;
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -112,6 +112,7 @@ enum class Class : ClassType {
    Render,            ///< Emulator video output and hardware acceleration
    Render_Software,   ///< Software renderer backend
    Render_OpenGL,     ///< OpenGL backend
+    Render_Vulkan,     ///< Vulkan backend
    Audio,             ///< Audio emulation
    Audio_DSP,         ///< The HLE implementation of the DSP
    Audio_Sink,        ///< Emulator audio output backend
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC 4.6+
-#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+// GCC
+#ifdef __GNUC__

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif __clang__
+#elif defined(__clang__)

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/common/threadsafe_queue.h
+++ b/src/common/threadsafe_queue.h
@@ -7,17 +7,17 @@
 // a simple lockless thread-safe,
 // single reader, single writer queue

-#include <algorithm>
 #include <atomic>
+#include <condition_variable>
 #include <cstddef>
 #include <mutex>
-#include "common/common_types.h"
+#include <utility>

 namespace Common {
-template <typename T, bool NeedSize = true>
+template <typename T>
 class SPSCQueue {
 public:
-    SPSCQueue() : size(0) {
+    SPSCQueue() {
        write_ptr = read_ptr = new ElementPtr();
    }
    ~SPSCQueue() {
@@ -25,13 +25,12 @@ public:
        delete read_ptr;
    }

-    u32 Size() const {
-        static_assert(NeedSize, "using Size() on FifoQueue without NeedSize");
+    std::size_t Size() const {
        return size.load();
    }

    bool Empty() const {
-        return !read_ptr->next.load();
+        return Size() == 0;
    }

    T& Front() const {
@@ -47,13 +46,14 @@ public:
        ElementPtr* new_ptr = new ElementPtr();
        write_ptr->next.store(new_ptr, std::memory_order_release);
        write_ptr = new_ptr;
-        if (NeedSize)
-            size++;
+        cv.notify_one();
+
+        ++size;
    }

    void Pop() {
-        if (NeedSize)
-            size--;
+        --size;
+
        ElementPtr* tmpptr = read_ptr;
        // advance the read pointer
        read_ptr = tmpptr->next.load();
@@ -66,8 +66,7 @@ public:
        if (Empty())
            return false;

-        if (NeedSize)
-            size--;
+        --size;

        ElementPtr* tmpptr = read_ptr;
        read_ptr = tmpptr->next.load(std::memory_order_acquire);
@@ -77,6 +76,16 @@ public:
        return true;
    }

+    T PopWait() {
+        if (Empty()) {
+            std::unique_lock<std::mutex> lock(cv_mutex);
+            cv.wait(lock, [this]() { return !Empty(); });
+        }
+        T t;
+        Pop(t);
+        return t;
+    }
+
    // not thread-safe
    void Clear() {
        size.store(0);
@@ -89,7 +98,7 @@ private:
    // and a pointer to the next ElementPtr
    class ElementPtr {
    public:
-        ElementPtr() : next(nullptr) {}
+        ElementPtr() {}
        ~ElementPtr() {
            ElementPtr* next_ptr = next.load();

@@ -98,21 +107,23 @@ private:
        }

        T current;
-        std::atomic<ElementPtr*> next;
+        std::atomic<ElementPtr*> next{nullptr};
    };

    ElementPtr* write_ptr;
    ElementPtr* read_ptr;
-    std::atomic<u32> size;
+    std::atomic_size_t size{0};
+    std::mutex cv_mutex;
+    std::condition_variable cv;
 };

 // a simple thread-safe,
 // single reader, multiple writer queue

-template <typename T, bool NeedSize = true>
+template <typename T>
 class MPSCQueue {
 public:
-    u32 Size() const {
+    std::size_t Size() const {
        return spsc_queue.Size();
    }

@@ -138,13 +149,17 @@ public:
        return spsc_queue.Pop(t);
    }

+    T PopWait() {
+        return spsc_queue.PopWait();
+    }
+
    // not thread-safe
    void Clear() {
        spsc_queue.Clear();
    }

 private:
-    SPSCQueue<T, NeedSize> spsc_queue;
+    SPSCQueue<T> spsc_queue;
    std::mutex write_lock;
 };
 } // namespace Common
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -400,6 +400,10 @@ add_library(core STATIC
    hle/service/time/time.h
    hle/service/usb/usb.cpp
    hle/service/usb/usb.h
+    hle/service/vi/display/vi_display.cpp
+    hle/service/vi/display/vi_display.h
+    hle/service/vi/layer/vi_layer.cpp
+    hle/service/vi/layer/vi_layer.h
    hle/service/vi/vi.cpp
    hle/service/vi/vi.h
    hle/service/vi/vi_m.cpp
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -112,14 +112,14 @@ public:
        // Always execute at least one tick.
        amortized_ticks = std::max<u64>(amortized_ticks, 1);

-        Timing::AddTicks(amortized_ticks);
+        parent.core_timing.AddTicks(amortized_ticks);
        num_interpreted_instructions = 0;
    }
    u64 GetTicksRemaining() override {
-        return std::max(Timing::GetDowncount(), 0);
+        return std::max(parent.core_timing.GetDowncount(), 0);
    }
    u64 GetCNTPCT() override {
-        return Timing::GetTicks();
+        return parent.core_timing.GetTicks();
    }

    ARM_Dynarmic& parent;
@@ -172,8 +172,10 @@ void ARM_Dynarmic::Step() {
    cb->InterpreterFallback(jit->GetPC(), 1);
 }

-ARM_Dynarmic::ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), core_index{core_index},
+ARM_Dynarmic::ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                           std::size_t core_index)
+    : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{core_timing},
+      core_index{core_index}, core_timing{core_timing},
      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
    ThreadContext ctx{};
    inner_unicorn.SaveContext(ctx);
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -16,6 +16,10 @@ namespace Memory {
 struct PageTable;
 }

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Dynarmic_Callbacks;
@@ -23,7 +27,8 @@ class DynarmicExclusiveMonitor;

 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    ARM_Dynarmic(ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ARM_Dynarmic(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+                 std::size_t core_index);
    ~ARM_Dynarmic();

    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
@@ -62,6 +67,7 @@ private:
    ARM_Unicorn inner_unicorn;

    std::size_t core_index;
+    Timing::CoreTiming& core_timing;
    DynarmicExclusiveMonitor& exclusive_monitor;

    Memory::PageTable* current_page_table = nullptr;
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -72,7 +72,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
    return {};
 }

-ARM_Unicorn::ARM_Unicorn() {
+ARM_Unicorn::ARM_Unicorn(Timing::CoreTiming& core_timing) : core_timing{core_timing} {
    CHECKED(uc_open(UC_ARCH_ARM64, UC_MODE_ARM, &uc));

    auto fpv = 3 << 20;
@@ -177,7 +177,7 @@ void ARM_Unicorn::Run() {
    if (GDBStub::IsServerEnabled()) {
        ExecuteInstructions(std::max(4000000, 0));
    } else {
-        ExecuteInstructions(std::max(Timing::GetDowncount(), 0));
+        ExecuteInstructions(std::max(core_timing.GetDowncount(), 0));
    }
 }

@@ -190,7 +190,7 @@ MICROPROFILE_DEFINE(ARM_Jit_Unicorn, "ARM JIT", "Unicorn", MP_RGB(255, 64, 64));
 void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
    MICROPROFILE_SCOPE(ARM_Jit_Unicorn);
    CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
-    Timing::AddTicks(num_instructions);
+    core_timing.AddTicks(num_instructions);
    if (GDBStub::IsServerEnabled()) {
        if (last_bkpt_hit) {
            uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -9,12 +9,17 @@
 #include "core/arm/arm_interface.h"
 #include "core/gdbstub/gdbstub.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Unicorn final : public ARM_Interface {
 public:
-    ARM_Unicorn();
+    explicit ARM_Unicorn(Timing::CoreTiming& core_timing);
    ~ARM_Unicorn();
+
    void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
                          Kernel::VMAPermission perms) override;
    void UnmapMemory(VAddr address, std::size_t size) override;
@@ -43,6 +48,7 @@ public:

 private:
    uc_engine* uc{};
+    Timing::CoreTiming& core_timing;
    GDBStub::BreakpointAddress last_bkpt{};
    bool last_bkpt_hit;
 };
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -94,8 +94,8 @@ struct System::Impl {
    ResultStatus Init(System& system, Frontend::EmuWindow& emu_window) {
        LOG_DEBUG(HW_Memory, "initialized OK");

-        Timing::Init();
-        kernel.Initialize();
+        core_timing.Initialize();
+        kernel.Initialize(core_timing);

        const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
            std::chrono::system_clock::now().time_since_epoch());
@@ -120,7 +120,7 @@ struct System::Impl {
        telemetry_session = std::make_unique<Core::TelemetrySession>();
        service_manager = std::make_shared<Service::SM::ServiceManager>();

-        Service::Init(service_manager, *virtual_filesystem);
+        Service::Init(service_manager, system, *virtual_filesystem);
        GDBStub::Init();

        renderer = VideoCore::CreateRenderer(emu_window, system);
@@ -128,7 +128,7 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+        gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());

        cpu_core_manager.Initialize(system);
        is_powered_on = true;
@@ -205,7 +205,7 @@ struct System::Impl {

        // Shutdown kernel and core timing
        kernel.Shutdown();
-        Timing::Shutdown();
+        core_timing.Shutdown();

        // Close app loader
        app_loader.reset();
@@ -232,9 +232,10 @@ struct System::Impl {
    }

    PerfStatsResults GetAndResetPerfStats() {
-        return perf_stats.GetAndResetStats(Timing::GetGlobalTimeUs());
+        return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
    }

+    Timing::CoreTiming core_timing;
    Kernel::KernelCore kernel;
    /// RealVfsFilesystem instance
    FileSys::VirtualFilesystem virtual_filesystem;
@@ -396,6 +397,14 @@ const Kernel::KernelCore& System::Kernel() const {
    return impl->kernel;
 }

+Timing::CoreTiming& System::CoreTiming() {
+    return impl->core_timing;
+}
+
+const Timing::CoreTiming& System::CoreTiming() const {
+    return impl->core_timing;
+}
+
 Core::PerfStats& System::GetPerfStats() {
    return impl->perf_stats;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -47,6 +47,10 @@ namespace VideoCore {
 class RendererBase;
 } // namespace VideoCore

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -205,6 +209,12 @@ public:
    /// Provides a constant pointer to the current process.
    const Kernel::Process* CurrentProcess() const;

+    /// Provides a reference to the core timing instance.
+    Timing::CoreTiming& CoreTiming();
+
+    /// Provides a constant reference to the core timing instance.
+    const Timing::CoreTiming& CoreTiming() const;
+
    /// Provides a reference to the kernel instance.
    Kernel::KernelCore& Kernel();

--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -49,17 +49,18 @@ bool CpuBarrier::Rendezvous() {
    return false;
 }

-Cpu::Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_index{core_index} {
+Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+         CpuBarrier& cpu_barrier, std::size_t core_index)
+    : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} {
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
-        arm_interface = std::make_unique<ARM_Dynarmic>(exclusive_monitor, core_index);
+        arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
 #else
        arm_interface = std::make_unique<ARM_Unicorn>();
        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
    } else {
-        arm_interface = std::make_unique<ARM_Unicorn>();
+        arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
    }

    scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface);
@@ -93,14 +94,14 @@ void Cpu::RunLoop(bool tight_loop) {

        if (IsMainCore()) {
            // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
-            Timing::Idle();
-            Timing::Advance();
+            core_timing.Idle();
+            core_timing.Advance();
        }

        PrepareReschedule();
    } else {
        if (IsMainCore()) {
-            Timing::Advance();
+            core_timing.Advance();
        }

        if (tight_loop) {
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
 class Scheduler;
 }

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Core {

 class ARM_Interface;
@@ -41,7 +45,8 @@ private:

 class Cpu {
 public:
-    Cpu(ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index);
+    Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
+        CpuBarrier& cpu_barrier, std::size_t core_index);
    ~Cpu();

    void RunLoop(bool tight_loop = true);
@@ -82,6 +87,7 @@ private:
    std::unique_ptr<ARM_Interface> arm_interface;
    CpuBarrier& cpu_barrier;
    std::unique_ptr<Kernel::Scheduler> scheduler;
+    Timing::CoreTiming& core_timing;

    std::atomic<bool> reschedule_pending = false;
    std::size_t core_index;
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -8,71 +8,60 @@
 #include <mutex>
 #include <string>
 #include <tuple>
-#include <unordered_map>
-#include <vector>
+
 #include "common/assert.h"
 #include "common/thread.h"
-#include "common/threadsafe_queue.h"
 #include "core/core_timing_util.h"

 namespace Core::Timing {

-static s64 global_timer;
-static int slice_length;
-static int downcount;
+constexpr int MAX_SLICE_LENGTH = 20000;

-struct EventType {
-    TimedCallback callback;
-    const std::string* name;
-};
-
-struct Event {
+struct CoreTiming::Event {
    s64 time;
    u64 fifo_order;
    u64 userdata;
    const EventType* type;
+
+    // Sort by time, unless the times are the same, in which case sort by
+    // the order added to the queue
+    friend bool operator>(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+    }
+
+    friend bool operator<(const Event& left, const Event& right) {
+        return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+    }
 };

-// Sort by time, unless the times are the same, in which case sort by the order added to the queue
-static bool operator>(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
+CoreTiming::CoreTiming() = default;
+CoreTiming::~CoreTiming() = default;
+
+void CoreTiming::Initialize() {
+    downcount = MAX_SLICE_LENGTH;
+    slice_length = MAX_SLICE_LENGTH;
+    global_timer = 0;
+    idled_cycles = 0;
+
+    // The time between CoreTiming being initialized and the first call to Advance() is considered
+    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
+    // executing the first cycle of each slice to prepare the slice length and downcount for
+    // that slice.
+    is_global_timer_sane = true;
+
+    event_fifo_id = 0;
+
+    const auto empty_timed_callback = [](u64, s64) {};
+    ev_lost = RegisterEvent("_lost_event", empty_timed_callback);
 }

-static bool operator<(const Event& left, const Event& right) {
-    return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
+void CoreTiming::Shutdown() {
+    MoveEvents();
+    ClearPendingEvents();
+    UnregisterAllEvents();
 }

-// unordered_map stores each element separately as a linked list node so pointers to elements
-// remain stable regardless of rehashes/resizing.
-static std::unordered_map<std::string, EventType> event_types;
-
-// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
-// We don't use std::priority_queue because we need to be able to serialize, unserialize and
-// erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't accomodated
-// by the standard adaptor class.
-static std::vector<Event> event_queue;
-static u64 event_fifo_id;
-// the queue for storing the events from other threads threadsafe until they will be added
-// to the event_queue by the emu thread
-static Common::MPSCQueue<Event, false> ts_queue;
-
-// the queue for unscheduling the events from other threads threadsafe
-static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue;
-
-constexpr int MAX_SLICE_LENGTH = 20000;
-
-static s64 idled_cycles;
-
-// Are we in a function that has been called from Advance()
-// If events are sheduled from a function that gets called from Advance(),
-// don't change slice_length and downcount.
-static bool is_global_timer_sane;
-
-static EventType* ev_lost = nullptr;
-
-static void EmptyTimedCallback(u64 userdata, s64 cyclesLate) {}
-
-EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
+EventType* CoreTiming::RegisterEvent(const std::string& name, TimedCallback callback) {
    // check for existing type with same name.
    // we want event type names to remain unique so that we can use them for serialization.
    ASSERT_MSG(event_types.find(name) == event_types.end(),
@@ -86,71 +75,31 @@ EventType* RegisterEvent(const std::string& name, TimedCallback callback) {
    return event_type;
 }

-void UnregisterAllEvents() {
+void CoreTiming::UnregisterAllEvents() {
    ASSERT_MSG(event_queue.empty(), "Cannot unregister events with events pending");
    event_types.clear();
 }

-void Init() {
-    downcount = MAX_SLICE_LENGTH;
-    slice_length = MAX_SLICE_LENGTH;
-    global_timer = 0;
-    idled_cycles = 0;
-
-    // The time between CoreTiming being intialized and the first call to Advance() is considered
-    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
-    // executing the first cycle of each slice to prepare the slice length and downcount for
-    // that slice.
-    is_global_timer_sane = true;
-
-    event_fifo_id = 0;
-    ev_lost = RegisterEvent("_lost_event", &EmptyTimedCallback);
-}
-
-void Shutdown() {
-    MoveEvents();
-    ClearPendingEvents();
-    UnregisterAllEvents();
-}
-
-// This should only be called from the CPU thread. If you are calling
-// it from any other thread, you are doing something evil
-u64 GetTicks() {
-    u64 ticks = static_cast<u64>(global_timer);
-    if (!is_global_timer_sane) {
-        ticks += slice_length - downcount;
-    }
-    return ticks;
-}
-
-void AddTicks(u64 ticks) {
-    downcount -= static_cast<int>(ticks);
-}
-
-u64 GetIdleTicks() {
-    return static_cast<u64>(idled_cycles);
-}
-
-void ClearPendingEvents() {
-    event_queue.clear();
-}
-
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
    ASSERT(event_type != nullptr);
-    s64 timeout = GetTicks() + cycles_into_future;
+    const s64 timeout = GetTicks() + cycles_into_future;
+
    // If this event needs to be scheduled before the next advance(), force one early
-    if (!is_global_timer_sane)
+    if (!is_global_timer_sane) {
        ForceExceptionCheck(cycles_into_future);
+    }
+
    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
 }

-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
+void CoreTiming::ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                         u64 userdata) {
    ts_queue.Push(Event{global_timer + cycles_into_future, 0, userdata, event_type});
 }

-void UnscheduleEvent(const EventType* event_type, u64 userdata) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
        return e.type == event_type && e.userdata == userdata;
    });

@@ -161,13 +110,33 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) {
    }
 }

-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
+void CoreTiming::UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) {
    unschedule_queue.Push(std::make_pair(event_type, userdata));
 }

-void RemoveEvent(const EventType* event_type) {
-    auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
-                              [&](const Event& e) { return e.type == event_type; });
+u64 CoreTiming::GetTicks() const {
+    u64 ticks = static_cast<u64>(global_timer);
+    if (!is_global_timer_sane) {
+        ticks += slice_length - downcount;
+    }
+    return ticks;
+}
+
+u64 CoreTiming::GetIdleTicks() const {
+    return static_cast<u64>(idled_cycles);
+}
+
+void CoreTiming::AddTicks(u64 ticks) {
+    downcount -= static_cast<int>(ticks);
+}
+
+void CoreTiming::ClearPendingEvents() {
+    event_queue.clear();
+}
+
+void CoreTiming::RemoveEvent(const EventType* event_type) {
+    const auto itr = std::remove_if(event_queue.begin(), event_queue.end(),
+                                    [&](const Event& e) { return e.type == event_type; });

    // Removing random items breaks the invariant so we have to re-establish it.
    if (itr != event_queue.end()) {
@@ -176,22 +145,24 @@ void RemoveEvent(const EventType* event_type) {
    }
 }

-void RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
+void CoreTiming::RemoveNormalAndThreadsafeEvent(const EventType* event_type) {
    MoveEvents();
    RemoveEvent(event_type);
 }

-void ForceExceptionCheck(s64 cycles) {
+void CoreTiming::ForceExceptionCheck(s64 cycles) {
    cycles = std::max<s64>(0, cycles);
-    if (downcount > cycles) {
-        // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
-        // here. Account for cycles already executed by adjusting the g.slice_length
-        slice_length -= downcount - static_cast<int>(cycles);
-        downcount = static_cast<int>(cycles);
+    if (downcount <= cycles) {
+        return;
    }
+
+    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
+    // here. Account for cycles already executed by adjusting the g.slice_length
+    slice_length -= downcount - static_cast<int>(cycles);
+    downcount = static_cast<int>(cycles);
 }

-void MoveEvents() {
+void CoreTiming::MoveEvents() {
    for (Event ev; ts_queue.Pop(ev);) {
        ev.fifo_order = event_fifo_id++;
        event_queue.emplace_back(std::move(ev));
@@ -199,13 +170,13 @@ void MoveEvents() {
    }
 }

-void Advance() {
+void CoreTiming::Advance() {
    MoveEvents();
    for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) {
        UnscheduleEvent(ev.first, ev.second);
    }

-    int cycles_executed = slice_length - downcount;
+    const int cycles_executed = slice_length - downcount;
    global_timer += cycles_executed;
    slice_length = MAX_SLICE_LENGTH;

@@ -229,16 +200,16 @@ void Advance() {
    downcount = slice_length;
 }

-void Idle() {
+void CoreTiming::Idle() {
    idled_cycles += downcount;
    downcount = 0;
 }

-std::chrono::microseconds GetGlobalTimeUs() {
+std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
    return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE};
 }

-int GetDowncount() {
+int CoreTiming::GetDowncount() const {
    return downcount;
 }

--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -4,6 +4,27 @@

 #pragma once

+#include <chrono>
+#include <functional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
+
+namespace Core::Timing {
+
+/// A callback that may be scheduled for a particular core timing event.
+using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+
+/// Contains the characteristics of a particular event.
+struct EventType {
+    /// The event's callback function.
+    TimedCallback callback;
+    /// A pointer to the name of the event.
+    const std::string* name;
+};
+
 /**
 * This is a system to schedule events into the emulated machine's future. Time is measured
 * in main CPU clock cycles.
@@ -16,80 +37,120 @@
 * inside callback:
 *   ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
 */
+class CoreTiming {
+public:
+    CoreTiming();
+    ~CoreTiming();

-#include <chrono>
-#include <functional>
-#include <string>
-#include "common/common_types.h"
+    CoreTiming(const CoreTiming&) = delete;
+    CoreTiming(CoreTiming&&) = delete;

-namespace Core::Timing {
+    CoreTiming& operator=(const CoreTiming&) = delete;
+    CoreTiming& operator=(CoreTiming&&) = delete;

-struct EventType;
+    /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
+    /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
+    void Initialize();

-using TimedCallback = std::function<void(u64 userdata, int cycles_late)>;
+    /// Tears down all timing related functionality.
+    void Shutdown();

-/**
- * CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
- * required to end slice -1 and start slice 0 before the first cycle of code is executed.
- */
-void Init();
-void Shutdown();
+    /// Registers a core timing event with the given name and callback.
+    ///
+    /// @param name     The name of the core timing event to register.
+    /// @param callback The callback to execute for the event.
+    ///
+    /// @returns An EventType instance representing the registered event.
+    ///
+    /// @pre The name of the event being registered must be unique among all
+    ///      registered events.
+    ///
+    EventType* RegisterEvent(const std::string& name, TimedCallback callback);

-/**
- * This should only be called from the emu thread, if you are calling it any other thread, you are
- * doing something evil
- */
-u64 GetTicks();
-u64 GetIdleTicks();
-void AddTicks(u64 ticks);
+    /// Unregisters all registered events thus far.
+    void UnregisterAllEvents();

-/**
- * Returns the event_type identifier. if name is not unique, it will assert.
- */
-EventType* RegisterEvent(const std::string& name, TimedCallback callback);
-void UnregisterAllEvents();
+    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
+    /// event is scheduled earlier than the current values.
+    ///
+    /// Scheduling from a callback will not update the downcount until the Advance() completes.
+    void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);

-/**
- * After the first Advance, the slice lengths and the downcount will be reduced whenever an event
- * is scheduled earlier than the current values.
- * Scheduling from a callback will not update the downcount until the Advance() completes.
- */
-void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata = 0);
+    /// This is to be called when outside of hle threads, such as the graphics thread, wants to
+    /// schedule things to be executed on the main thread.
+    ///
+    /// @note This doesn't change slice_length and thus events scheduled by this might be
+    /// called with a delay of up to MAX_SLICE_LENGTH
+    void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type,
+                                 u64 userdata = 0);

-/**
- * This is to be called when outside of hle threads, such as the graphics thread, wants to
- * schedule things to be executed on the main thread.
- * Not that this doesn't change slice_length and thus events scheduled by this might be called
- * with a delay of up to MAX_SLICE_LENGTH
- */
-void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata);
+    void UnscheduleEvent(const EventType* event_type, u64 userdata);
+    void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);

-void UnscheduleEvent(const EventType* event_type, u64 userdata);
-void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata);
+    /// We only permit one event of each type in the queue at a time.
+    void RemoveEvent(const EventType* event_type);
+    void RemoveNormalAndThreadsafeEvent(const EventType* event_type);

-/// We only permit one event of each type in the queue at a time.
-void RemoveEvent(const EventType* event_type);
-void RemoveNormalAndThreadsafeEvent(const EventType* event_type);
+    void ForceExceptionCheck(s64 cycles);

-/** Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
- * the previous timing slice and begins the next one, you must Advance from the previous
- * slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
- * Advance() is required to initialize the slice length before the first cycle of emulated
- * instructions is executed.
- */
-void Advance();
-void MoveEvents();
+    /// This should only be called from the emu thread, if you are calling it any other thread,
+    /// you are doing something evil
+    u64 GetTicks() const;

-/// Pretend that the main CPU has executed enough cycles to reach the next event.
-void Idle();
+    u64 GetIdleTicks() const;

-/// Clear all pending events. This should ONLY be done on exit.
-void ClearPendingEvents();
+    void AddTicks(u64 ticks);

-void ForceExceptionCheck(s64 cycles);
+    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
+    /// the previous timing slice and begins the next one, you must Advance from the previous
+    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
+    /// Advance() is required to initialize the slice length before the first cycle of emulated
+    /// instructions is executed.
+    void Advance();

-std::chrono::microseconds GetGlobalTimeUs();
+    /// Pretend that the main CPU has executed enough cycles to reach the next event.
+    void Idle();

-int GetDowncount();
+    std::chrono::microseconds GetGlobalTimeUs() const;
+
+    int GetDowncount() const;
+
+private:
+    struct Event;
+
+    /// Clear all pending events. This should ONLY be done on exit.
+    void ClearPendingEvents();
+    void MoveEvents();
+
+    s64 global_timer = 0;
+    s64 idled_cycles = 0;
+    int slice_length = 0;
+    int downcount = 0;
+
+    // Are we in a function that has been called from Advance()
+    // If events are scheduled from a function that gets called from Advance(),
+    // don't change slice_length and downcount.
+    bool is_global_timer_sane = false;
+
+    // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
+    // We don't use std::priority_queue because we need to be able to serialize, unserialize and
+    // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
+    // accomodated by the standard adaptor class.
+    std::vector<Event> event_queue;
+    u64 event_fifo_id = 0;
+
+    // Stores each element separately as a linked list node so pointers to elements
+    // remain stable regardless of rehashes/resizing.
+    std::unordered_map<std::string, EventType> event_types;
+
+    // The queue for storing the events from other threads threadsafe until they will be added
+    // to the event_queue by the emu thread
+    Common::MPSCQueue<Event> ts_queue;
+
+    // The queue for unscheduling the events from other threads threadsafe
+    Common::MPSCQueue<std::pair<const EventType*, u64>> unschedule_queue;
+
+    EventType* ev_lost = nullptr;
+};

 } // namespace Core::Timing
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,7 +27,8 @@ void CpuCoreManager::Initialize(System& system) {
    exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());

    for (std::size_t index = 0; index < cores.size(); ++index) {
-        cores[index] = std::make_unique<Cpu>(*exclusive_monitor, *barrier, index);
+        cores[index] =
+            std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
    }

    // Create threads for CPU cores 1-3, and build thread_to_cpu map
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
 }

 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
-    std::ifstream file(filename);
+    std::ifstream file;
+    OpenFStream(file, filename, std::ios_base::in);
    if (!file.is_open())
        return;

--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
    if (offset + length > data.size())
        data.resize(offset + length);
    const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data(), data_, write);
+    std::memcpy(data.data() + offset, data_, write);
    return write;
 }

--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -17,8 +17,7 @@
 #include "core/hle/result.h"
 #include "core/memory.h"

-namespace Kernel {
-namespace AddressArbiter {
+namespace Kernel::AddressArbiter {

 // Performs actual address waiting logic.
 static ResultCode WaitForAddress(VAddr address, s64 timeout) {
@@ -176,5 +175,4 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {

    return WaitForAddress(address, timeout);
 }
-} // namespace AddressArbiter
-} // namespace Kernel
+} // namespace Kernel::AddressArbiter
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -8,9 +8,8 @@

 union ResultCode;

-namespace Kernel {
+namespace Kernel::AddressArbiter {

-namespace AddressArbiter {
 enum class ArbitrationType {
    WaitIfLessThan = 0,
    DecrementAndWaitIfLessThan = 1,
@@ -29,6 +28,5 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu

 ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
 ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
-} // namespace AddressArbiter

-} // namespace Kernel
+} // namespace Kernel::AddressArbiter
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -86,11 +86,11 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 }

 struct KernelCore::Impl {
-    void Initialize(KernelCore& kernel) {
+    void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) {
        Shutdown();

        InitializeSystemResourceLimit(kernel);
-        InitializeThreads();
+        InitializeThreads(core_timing);
    }

    void Shutdown() {
@@ -122,9 +122,9 @@ struct KernelCore::Impl {
        ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess());
    }

-    void InitializeThreads() {
+    void InitializeThreads(Core::Timing::CoreTiming& core_timing) {
        thread_wakeup_event_type =
-            Core::Timing::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
+            core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
    }

    std::atomic<u32> next_object_id{0};
@@ -152,8 +152,8 @@ KernelCore::~KernelCore() {
    Shutdown();
 }

-void KernelCore::Initialize() {
-    impl->Initialize(*this);
+void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) {
+    impl->Initialize(*this, core_timing);
 }

 void KernelCore::Shutdown() {
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -12,8 +12,9 @@ template <typename T>
 class ResultVal;

 namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace Kernel {

@@ -39,7 +40,11 @@ public:
    KernelCore& operator=(KernelCore&&) = delete;

    /// Resets the kernel to a clean slate for use.
-    void Initialize();
+    ///
+    /// @param core_timing CoreTiming instance used to create any necessary
+    ///                    kernel-specific callback events.
+    ///
+    void Initialize(Core::Timing::CoreTiming& core_timing);

    /// Clears all resources in use by the kernel instance.
    void Shutdown();
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -111,7 +111,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {

 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
    const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = Core::Timing::GetTicks();
+    const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;

    if (thread != nullptr) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -918,6 +918,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        }

        const auto& system = Core::System::GetInstance();
+        const auto& core_timing = system.CoreTiming();
        const auto& scheduler = system.CurrentScheduler();
        const auto* const current_thread = scheduler.GetCurrentThread();
        const bool same_thread = current_thread == thread;
@@ -927,9 +928,9 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();

-            out_ticks = thread_ticks + (Core::Timing::GetTicks() - prev_ctx_ticks);
+            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
-            out_ticks = Core::Timing::GetTicks() - prev_ctx_ticks;
+            out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
        }

        *result = out_ticks;
@@ -1546,10 +1547,11 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to
 static u64 GetSystemTick() {
    LOG_TRACE(Kernel_SVC, "called");

-    const u64 result{Core::Timing::GetTicks()};
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+    const u64 result{core_timing.GetTicks()};

    // Advance time to defeat dumb games that busy-wait for the frame to end.
-    Core::Timing::AddTicks(400);
+    core_timing.AddTicks(400);

    return result;
 }
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -43,7 +43,8 @@ Thread::~Thread() = default;

 void Thread::Stop() {
    // Cancel any outstanding wakeup events for this thread
-    Core::Timing::UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                             callback_handle);
    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
    callback_handle = 0;

@@ -85,13 +86,14 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {

    // This function might be called from any thread so we have to be cautious and use the
    // thread-safe version of ScheduleEvent.
-    Core::Timing::ScheduleEventThreadsafe(Core::Timing::nsToCycles(nanoseconds),
-                                          kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    Core::System::GetInstance().CoreTiming().ScheduleEventThreadsafe(
+        Core::Timing::nsToCycles(nanoseconds), kernel.ThreadWakeupCallbackEventType(),
+        callback_handle);
 }

 void Thread::CancelWakeupTimer() {
-    Core::Timing::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(),
-                                            callback_handle);
+    Core::System::GetInstance().CoreTiming().UnscheduleEventThreadsafe(
+        kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -190,6 +192,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
        return ResultCode(-1);
    }

+    auto& system = Core::System::GetInstance();
    SharedPtr<Thread> thread(new Thread(kernel));

    thread->thread_id = kernel.CreateNewThreadID();
@@ -198,7 +201,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->stack_top = stack_top;
    thread->tpidr_el0 = 0;
    thread->nominal_priority = thread->current_priority = priority;
-    thread->last_running_ticks = Core::Timing::GetTicks();
+    thread->last_running_ticks = system.CoreTiming().GetTicks();
    thread->processor_id = processor_id;
    thread->ideal_core = processor_id;
    thread->affinity_mask = 1ULL << processor_id;
@@ -209,7 +212,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->name = std::move(name);
    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = &owner_process;
-    thread->scheduler = &Core::System::GetInstance().Scheduler(processor_id);
+    thread->scheduler = &system.Scheduler(processor_id);
    thread->scheduler->AddThread(thread, priority);
    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);

@@ -258,7 +261,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
    }

    if (status == ThreadStatus::Running) {
-        last_running_ticks = Core::Timing::GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
    }

    status = new_status;
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -68,12 +68,12 @@ public:
        RegisterHandlers(functions);

        // This is the event handle used to check if the audio buffer was released
-        auto& kernel = Core::System::GetInstance().Kernel();
-        buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioOutBufferReleased");
+        auto& system = Core::System::GetInstance();
+        buffer_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioOutBufferReleased");

-        stream = audio_core.OpenStream(audio_params.sample_rate, audio_params.channel_count,
-                                       std::move(unique_name),
+        stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
+                                       audio_params.channel_count, std::move(unique_name),
                                       [=]() { buffer_event.writable->Signal(); });
    }

--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -42,10 +42,11 @@ public:
        // clang-format on
        RegisterHandlers(functions);

-        auto& kernel = Core::System::GetInstance().Kernel();
-        system_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                              "IAudioRenderer:SystemEvent");
-        renderer = std::make_unique<AudioCore::AudioRenderer>(audren_params, system_event.writable);
+        auto& system = Core::System::GetInstance();
+        system_event = Kernel::WritableEvent::CreateEventPair(
+            system.Kernel(), Kernel::ResetType::Sticky, "IAudioRenderer:SystemEvent");
+        renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params,
+                                                              system_event.writable);
    }

 private:
--- a/src/core/hle/service/hid/controllers/controller_base.h
+++ b/src/core/hle/service/hid/controllers/controller_base.h
@@ -7,6 +7,10 @@
 #include "common/common_types.h"
 #include "common/swap.h"

+namespace Core::Timing {
+class CoreTiming;
+}
+
 namespace Service::HID {
 class ControllerBase {
 public:
@@ -20,7 +24,8 @@ public:
    virtual void OnRelease() = 0;

    // When the controller is requesting an update for the shared memory
-    virtual void OnUpdate(u8* data, std::size_t size) = 0;
+    virtual void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                          std::size_t size) = 0;

    // Called when input devices should be loaded
    virtual void OnLoadInputDevices() = 0;
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -21,8 +21,9 @@ void Controller_DebugPad::OnInit() {}

 void Controller_DebugPad::OnRelease() {}

-void Controller_DebugPad::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = Core::Timing::GetTicks();
+void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/debug_pad.h
+++ b/src/core/hle/service/hid/controllers/debug_pad.h
@@ -26,7 +26,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -17,8 +17,9 @@ void Controller_Gesture::OnInit() {}

 void Controller_Gesture::OnRelease() {}

-void Controller_Gesture::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = Core::Timing::GetTicks();
+void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/gesture.h
+++ b/src/core/hle/service/hid/controllers/gesture.h
@@ -22,7 +22,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -19,8 +19,9 @@ void Controller_Keyboard::OnInit() {}

 void Controller_Keyboard::OnRelease() {}

-void Controller_Keyboard::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = Core::Timing::GetTicks();
+void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                   std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/keyboard.h
+++ b/src/core/hle/service/hid/controllers/keyboard.h
@@ -25,7 +25,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -17,8 +17,9 @@ Controller_Mouse::~Controller_Mouse() = default;
 void Controller_Mouse::OnInit() {}
 void Controller_Mouse::OnRelease() {}

-void Controller_Mouse::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = Core::Timing::GetTicks();
+void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/mouse.h
+++ b/src/core/hle/service/hid/controllers/mouse.h
@@ -24,7 +24,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -288,7 +288,8 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
    rstick_entry.y = static_cast<s32>(stick_r_y_f * HID_JOYSTICK_MAX);
 }

-void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
+void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t data_len) {
    if (!IsControllerActivated())
        return;
    for (std::size_t i = 0; i < shared_memory_entries.size(); i++) {
@@ -308,7 +309,7 @@ void Controller_NPad::OnUpdate(u8* data, std::size_t data_len) {
            const auto& last_entry =
                main_controller->npad[main_controller->common.last_entry_index];

-            main_controller->common.timestamp = Core::Timing::GetTicks();
+            main_controller->common.timestamp = core_timing.GetTicks();
            main_controller->common.last_entry_index =
                (main_controller->common.last_entry_index + 1) % 17;

--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -30,7 +30,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -16,13 +16,14 @@ void Controller_Stubbed::OnInit() {}

 void Controller_Stubbed::OnRelease() {}

-void Controller_Stubbed::OnUpdate(u8* data, std::size_t size) {
+void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                  std::size_t size) {
    if (!smart_update) {
        return;
    }

    CommonHeader header{};
-    header.timestamp = Core::Timing::GetTicks();
+    header.timestamp = core_timing.GetTicks();
    header.total_entry_count = 17;
    header.entry_count = 0;
    header.last_entry_index = 0;
--- a/src/core/hle/service/hid/controllers/stubbed.h
+++ b/src/core/hle/service/hid/controllers/stubbed.h
@@ -20,7 +20,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -20,8 +20,9 @@ void Controller_Touchscreen::OnInit() {}

 void Controller_Touchscreen::OnRelease() {}

-void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
-    shared_memory.header.timestamp = Core::Timing::GetTicks();
+void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                                      std::size_t size) {
+    shared_memory.header.timestamp = core_timing.GetTicks();
    shared_memory.header.total_entry_count = 17;

    if (!IsControllerActivated()) {
@@ -48,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(u8* data, std::size_t size) {
        touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
        touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
        touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
-        const u64 tick = Core::Timing::GetTicks();
+        const u64 tick = core_timing.GetTicks();
        touch_entry.delta_time = tick - last_touch;
        last_touch = tick;
        touch_entry.finger = Settings::values.touchscreen.finger;
--- a/src/core/hle/service/hid/controllers/touchscreen.h
+++ b/src/core/hle/service/hid/controllers/touchscreen.h
@@ -24,7 +24,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -17,9 +17,10 @@ void Controller_XPad::OnInit() {}

 void Controller_XPad::OnRelease() {}

-void Controller_XPad::OnUpdate(u8* data, std::size_t size) {
+void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
+                               std::size_t size) {
    for (auto& xpad_entry : shared_memory.shared_memory_entries) {
-        xpad_entry.header.timestamp = Core::Timing::GetTicks();
+        xpad_entry.header.timestamp = core_timing.GetTicks();
        xpad_entry.header.total_entry_count = 17;

        if (!IsControllerActivated()) {
--- a/src/core/hle/service/hid/controllers/xpad.h
+++ b/src/core/hle/service/hid/controllers/xpad.h
@@ -22,7 +22,7 @@ public:
    void OnRelease() override;

    // When the controller is requesting an update for the shared memory
-    void OnUpdate(u8* data, std::size_t size) override;
+    void OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data, std::size_t size) override;

    // Called when input devices should be loaded
    void OnLoadInputDevices() override;
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -73,13 +73,15 @@ IAppletResource::IAppletResource() : ServiceFramework("IAppletResource") {
    GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000);

    // Register update callbacks
-    pad_update_event = Core::Timing::RegisterEvent(
-        "HID::UpdatePadCallback",
-        [this](u64 userdata, int cycles_late) { UpdateControllers(userdata, cycles_late); });
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+    pad_update_event =
+        core_timing.RegisterEvent("HID::UpdatePadCallback", [this](u64 userdata, int cycles_late) {
+            UpdateControllers(userdata, cycles_late);
+        });

    // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)

-    Core::Timing::ScheduleEvent(pad_update_ticks, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks, pad_update_event);

    ReloadInputDevices();
 }
@@ -93,7 +95,7 @@ void IAppletResource::DeactivateController(HidController controller) {
 }

 IAppletResource ::~IAppletResource() {
-    Core::Timing::UnscheduleEvent(pad_update_event, 0);
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(pad_update_event, 0);
 }

 void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
@@ -105,15 +107,17 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
 }

 void IAppletResource::UpdateControllers(u64 userdata, int cycles_late) {
+    auto& core_timing = Core::System::GetInstance().CoreTiming();
+
    const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
    for (const auto& controller : controllers) {
        if (should_reload) {
            controller->OnLoadInputDevices();
        }
-        controller->OnUpdate(shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
+        controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
    }

-    Core::Timing::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+    core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
 }

 class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {

    IPC::ResponseBuilder rb{ctx, 5};
    rb.Push(RESULT_SUCCESS);
-    rb.PushRaw<u64>(Core::Timing::GetTicks());
+    rb.PushRaw<u64>(Core::System::GetInstance().CoreTiming().GetTicks());
    rb.PushRaw<u32>(0);
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
@@ -184,7 +185,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o

    IoctlGetGpuTime params{};
    std::memcpy(&params, input.data(), input.size());
-    params.gpu_time = Core::Timing::cyclesToNs(Core::Timing::GetTicks());
+    params.gpu_time = Core::Timing::cyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks());
    std::memcpy(output.data(), &params, output.size());
    return 0;
 }
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -14,11 +14,12 @@
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/readable_event.h"
-#include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "core/hle/service/nvflinger/nvflinger.h"
+#include "core/hle/service/vi/display/vi_display.h"
+#include "core/hle/service/vi/layer/vi_layer.h"
 #include "core/perf_stats.h"
 #include "video_core/renderer_base.h"

@@ -27,19 +28,25 @@ namespace Service::NVFlinger {
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

-NVFlinger::NVFlinger() {
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
+    displays.emplace_back(0, "Default");
+    displays.emplace_back(1, "External");
+    displays.emplace_back(2, "Edid");
+    displays.emplace_back(3, "Internal");
+    displays.emplace_back(4, "Null");
+
    // Schedule the screen composition events
    composition_event =
-        Core::Timing::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
+        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
            Compose();
-            Core::Timing::ScheduleEvent(frame_ticks - cycles_late, composition_event);
+            this->core_timing.ScheduleEvent(frame_ticks - cycles_late, composition_event);
        });

-    Core::Timing::ScheduleEvent(frame_ticks, composition_event);
+    core_timing.ScheduleEvent(frame_ticks, composition_event);
 }

 NVFlinger::~NVFlinger() {
-    Core::Timing::UnscheduleEvent(composition_event, 0);
+    core_timing.UnscheduleEvent(composition_event, 0);
 }

 void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
@@ -52,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    // TODO(Subv): Currently we only support the Default display.
    ASSERT(name == "Default");

-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.name == name; });
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetName() == name; });
    if (itr == displays.end()) {
        return {};
    }

-    return itr->id;
+    return itr->GetID();
 }

 std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -68,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
        return {};
    }

-    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
-
    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
-    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
-    display->layers.emplace_back(layer_id, buffer_queue);
-    buffer_queues.emplace_back(std::move(buffer_queue));
+    buffer_queues.emplace_back(buffer_queue_id, layer_id);
+    display->CreateLayer(layer_id, buffer_queues.back());
    return layer_id;
 }

@@ -85,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
        return {};
    }

-    return layer->buffer_queue->GetId();
+    return layer->GetBufferQueue().GetId();
 }

 Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -95,20 +100,29 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
        return nullptr;
    }

-    return display->vsync_event.readable;
+    return display->GetVSyncEvent();
 }

-std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
+BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [&](const auto& queue) { return queue->GetId() == id; });
+                                  [id](const auto& queue) { return queue.GetId() == id; });

    ASSERT(itr != buffer_queues.end());
    return *itr;
 }

-Display* NVFlinger::FindDisplay(u64 display_id) {
-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.id == display_id; });
+const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
+    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
+                                  [id](const auto& queue) { return queue.GetId() == id; });
+
+    ASSERT(itr != buffer_queues.end());
+    return *itr;
+}
+
+VI::Display* NVFlinger::FindDisplay(u64 display_id) {
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -117,9 +131,10 @@ Display* NVFlinger::FindDisplay(u64 display_id) {
    return &*itr;
 }

-const Display* NVFlinger::FindDisplay(u64 display_id) const {
-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const Display& display) { return display.id == display_id; });
+const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -128,57 +143,41 @@ const Display* NVFlinger::FindDisplay(u64 display_id) const {
    return &*itr;
 }

-Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
+VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
    auto* const display = FindDisplay(display_id);

    if (display == nullptr) {
        return nullptr;
    }

-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    return display->FindLayer(layer_id);
 }

-const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
+const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
    const auto* const display = FindDisplay(display_id);

    if (display == nullptr) {
        return nullptr;
    }

-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    return display->FindLayer(layer_id);
 }

 void NVFlinger::Compose() {
    for (auto& display : displays) {
        // Trigger vsync for this display at the end of drawing
-        SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
+        SCOPE_EXIT({ display.SignalVSyncEvent(); });

        // Don't do anything for displays without layers.
-        if (display.layers.empty())
+        if (!display.HasLayers())
            continue;

        // TODO(Subv): Support more than 1 layer.
-        ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
-
-        Layer& layer = display.layers[0];
-        auto& buffer_queue = layer.buffer_queue;
+        VI::Layer& layer = display.GetLayer(0);
+        auto& buffer_queue = layer.GetBufferQueue();

        // Search for a queued buffer and acquire it
-        auto buffer = buffer_queue->AcquireBuffer();
+        auto buffer = buffer_queue.AcquireBuffer();

        MicroProfileFlip();

@@ -203,19 +202,8 @@ void NVFlinger::Compose() {
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue->ReleaseBuffer(buffer->get().slot);
+        buffer_queue.ReleaseBuffer(buffer->get().slot);
    }
 }

-Layer::Layer(u64 id, std::shared_ptr<BufferQueue> queue) : id(id), buffer_queue(std::move(queue)) {}
-Layer::~Layer() = default;
-
-Display::Display(u64 id, std::string name) : id(id), name(std::move(name)) {
-    auto& kernel = Core::System::GetInstance().Kernel();
-    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
-                                                         fmt::format("Display VSync Event {}", id));
-}
-
-Display::~Display() = default;
-
 } // namespace Service::NVFlinger
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,7 +4,6 @@

 #pragma once

-#include <array>
 #include <memory>
 #include <optional>
 #include <string>
@@ -15,8 +14,9 @@
 #include "core/hle/kernel/object.h"

 namespace Core::Timing {
+class CoreTiming;
 struct EventType;
-}
+} // namespace Core::Timing

 namespace Kernel {
 class ReadableEvent;
@@ -25,34 +25,20 @@ class WritableEvent;

 namespace Service::Nvidia {
 class Module;
-}
+} // namespace Service::Nvidia
+
+namespace Service::VI {
+class Display;
+class Layer;
+} // namespace Service::VI

 namespace Service::NVFlinger {

 class BufferQueue;

-struct Layer {
-    Layer(u64 id, std::shared_ptr<BufferQueue> queue);
-    ~Layer();
-
-    u64 id;
-    std::shared_ptr<BufferQueue> buffer_queue;
-};
-
-struct Display {
-    Display(u64 id, std::string name);
-    ~Display();
-
-    u64 id;
-    std::string name;
-
-    std::vector<Layer> layers;
-    Kernel::EventPair vsync_event;
-};
-
 class NVFlinger final {
 public:
-    NVFlinger();
+    explicit NVFlinger(Core::Timing::CoreTiming& core_timing);
    ~NVFlinger();

    /// Sets the NVDrv module instance to use to send buffers to the GPU.
@@ -79,7 +65,10 @@ public:
    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
-    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
+    BufferQueue& FindBufferQueue(u32 id);
+
+    /// Obtains a buffer queue identified by the ID.
+    const BufferQueue& FindBufferQueue(u32 id) const;

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
@@ -87,27 +76,21 @@ public:

 private:
    /// Finds the display identified by the specified ID.
-    Display* FindDisplay(u64 display_id);
+    VI::Display* FindDisplay(u64 display_id);

    /// Finds the display identified by the specified ID.
-    const Display* FindDisplay(u64 display_id) const;
+    const VI::Display* FindDisplay(u64 display_id) const;

    /// Finds the layer identified by the specified ID in the desired display.
-    Layer* FindLayer(u64 display_id, u64 layer_id);
+    VI::Layer* FindLayer(u64 display_id, u64 layer_id);

    /// Finds the layer identified by the specified ID in the desired display.
-    const Layer* FindLayer(u64 display_id, u64 layer_id) const;
+    const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;

    std::shared_ptr<Nvidia::Module> nvdrv;

-    std::array<Display, 5> displays{{
-        {0, "Default"},
-        {1, "External"},
-        {2, "Edid"},
-        {3, "Internal"},
-        {4, "Null"},
-    }};
-    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
+    std::vector<VI::Display> displays;
+    std::vector<BufferQueue> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
    u64 next_layer_id = 1;
@@ -117,6 +100,9 @@ private:

    /// Event that handles screen composition.
    Core::Timing::EventType* composition_event;
+
+    /// Core timing instance for registering/unregistering the composition event.
+    Core::Timing::CoreTiming& core_timing;
 };

 } // namespace Service::NVFlinger
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -194,10 +194,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
 // Module interface

 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs) {
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs) {
    // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
    // here and pass it into the respective InstallInterfaces functions.
-    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>();
+    auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());

    SM::ServiceManager::InstallInterfaces(sm);

--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -14,6 +14,14 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace Service

+namespace Core {
+class System;
+}
+
+namespace FileSys {
+class VfsFilesystem;
+}
+
 namespace Kernel {
 class ClientPort;
 class ServerPort;
@@ -21,10 +29,6 @@ class ServerSession;
 class HLERequestContext;
 } // namespace Kernel

-namespace FileSys {
-class VfsFilesystem;
-}
-
 namespace Service {

 namespace SM {
@@ -178,7 +182,8 @@ private:
 };

 /// Initialize ServiceManager
-void Init(std::shared_ptr<SM::ServiceManager>& sm, FileSys::VfsFilesystem& vfs);
+void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
+          FileSys::VfsFilesystem& vfs);

 /// Shutdown ServiceManager
 void Shutdown();
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -5,6 +5,7 @@
 #include <chrono>
 #include <ctime>
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hle/ipc_helpers.h"
@@ -106,8 +107,9 @@ private:
    void GetCurrentTimePoint(Kernel::HLERequestContext& ctx) {
        LOG_DEBUG(Service_Time, "called");

+        const auto& core_timing = Core::System::GetInstance().CoreTiming();
        const SteadyClockTimePoint steady_clock_time_point{
-            Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000};
+            Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000};
        IPC::ResponseBuilder rb{ctx, (sizeof(SteadyClockTimePoint) / 4) + 2};
        rb.Push(RESULT_SUCCESS);
        rb.PushRaw(steady_clock_time_point);
@@ -281,8 +283,9 @@ void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
        return;
    }

+    const auto& core_timing = Core::System::GetInstance().CoreTiming();
    const SteadyClockTimePoint steady_clock_time_point{
-        Core::Timing::cyclesToMs(Core::Timing::GetTicks()) / 1000, {}};
+        Core::Timing::cyclesToMs(core_timing.GetTicks()) / 1000, {}};

    CalendarTime calendar_time{};
    calendar_time.year = tm->tm_year + 1900;
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -0,0 +1,71 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <utility>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "core/hle/kernel/readable_event.h"
+#include "core/hle/service/vi/display/vi_display.h"
+#include "core/hle/service/vi/layer/vi_layer.h"
+
+namespace Service::VI {
+
+Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
+    auto& kernel = Core::System::GetInstance().Kernel();
+    vsync_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Sticky,
+                                                         fmt::format("Display VSync Event {}", id));
+}
+
+Display::~Display() = default;
+
+Layer& Display::GetLayer(std::size_t index) {
+    return layers.at(index);
+}
+
+const Layer& Display::GetLayer(std::size_t index) const {
+    return layers.at(index);
+}
+
+Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
+    return vsync_event.readable;
+}
+
+void Display::SignalVSyncEvent() {
+    vsync_event.writable->Signal();
+}
+
+void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
+    // TODO(Subv): Support more than 1 layer.
+    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
+
+    layers.emplace_back(id, buffer_queue);
+}
+
+Layer* Display::FindLayer(u64 id) {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+const Layer* Display::FindLayer(u64 id) const {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+} // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -0,0 +1,98 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "common/common_types.h"
+#include "core/hle/kernel/writable_event.h"
+
+namespace Service::NVFlinger {
+class BufferQueue;
+}
+
+namespace Service::VI {
+
+class Layer;
+
+/// Represents a single display type
+class Display {
+public:
+    /// Constructs a display with a given unique ID and name.
+    ///
+    /// @param id   The unique ID for this display.
+    /// @param name The name for this display.
+    ///
+    Display(u64 id, std::string name);
+    ~Display();
+
+    Display(const Display&) = delete;
+    Display& operator=(const Display&) = delete;
+
+    Display(Display&&) = default;
+    Display& operator=(Display&&) = default;
+
+    /// Gets the unique ID assigned to this display.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets the name of this display
+    const std::string& GetName() const {
+        return name;
+    }
+
+    /// Whether or not this display has any layers added to it.
+    bool HasLayers() const {
+        return !layers.empty();
+    }
+
+    /// Gets a layer for this display based off an index.
+    Layer& GetLayer(std::size_t index);
+
+    /// Gets a layer for this display based off an index.
+    const Layer& GetLayer(std::size_t index) const;
+
+    /// Gets the readable vsync event.
+    Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
+
+    /// Signals the internal vsync event.
+    void SignalVSyncEvent();
+
+    /// Creates and adds a layer to this display with the given ID.
+    ///
+    /// @param id           The ID to assign to the created layer.
+    /// @param buffer_queue The buffer queue for the layer instance to use.
+    ///
+    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    Layer* FindLayer(u64 id);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    const Layer* FindLayer(u64 id) const;
+
+private:
+    u64 id;
+    std::string name;
+
+    std::vector<Layer> layers;
+    Kernel::EventPair vsync_event;
+};
+
+} // namespace Service::VI
--- a/src/core/hle/service/vi/layer/vi_layer.cpp
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -0,0 +1,13 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/vi/layer/vi_layer.h"
+
+namespace Service::VI {
+
+Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
+
+Layer::~Layer() = default;
+
+} // namespace Service::VI
--- a/src/core/hle/service/vi/layer/vi_layer.h
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Service::NVFlinger {
+class BufferQueue;
+}
+
+namespace Service::VI {
+
+/// Represents a single display layer.
+class Layer {
+public:
+    /// Constructs a layer with a given ID and buffer queue.
+    ///
+    /// @param id    The ID to assign to this layer.
+    /// @param queue The buffer queue for this layer to use.
+    ///
+    Layer(u64 id, NVFlinger::BufferQueue& queue);
+    ~Layer();
+
+    Layer(const Layer&) = delete;
+    Layer& operator=(const Layer&) = delete;
+
+    Layer(Layer&&) = default;
+    Layer& operator=(Layer&&) = delete;
+
+    /// Gets the ID for this layer.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets a reference to the buffer queue this layer is using.
+    NVFlinger::BufferQueue& GetBufferQueue() {
+        return buffer_queue;
+    }
+
+    /// Gets a const reference to the buffer queue this layer is using.
+    const NVFlinger::BufferQueue& GetBufferQueue() const {
+        return buffer_queue;
+    }
+
+private:
+    u64 id;
+    NVFlinger::BufferQueue& buffer_queue;
+};
+
+} // namespace Service::VI
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -525,7 +525,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  static_cast<u32>(transaction), flags);

-        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        auto& buffer_queue = nv_flinger->FindBufferQueue(id);

        if (transaction == TransactionId::Connect) {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
        } else if (transaction == TransactionId::SetPreallocatedBuffer) {
            IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
+            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);

            IGBPSetPreallocatedBufferResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);

            if (slot) {
                // Buffer is available
@@ -559,8 +559,8 @@ private:
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto buffer_queue = nv_flinger->FindBufferQueue(id);
-                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+                        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+                        std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
                        ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");

                        IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
                        IPC::ResponseBuilder rb{ctx, 2};
                        rb.Push(RESULT_SUCCESS);
                    },
-                    buffer_queue->GetWritableBufferWaitEvent());
+                    buffer_queue.GetWritableBufferWaitEvent());
            }
        } else if (transaction == TransactionId::RequestBuffer) {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

-            auto& buffer = buffer_queue->RequestBuffer(request.slot);
+            auto& buffer = buffer_queue.RequestBuffer(request.slot);

            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::QueueBuffer) {
            IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
-                                      request.data.GetCropRect());
+            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
+                                     request.data.GetCropRect());

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::Query) {
            IGBPQueryRequestParcel request{ctx.ReadBuffer()};

-            u32 value =
-                buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
+            const u32 value =
+                buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));

            IGBPQueryResponseParcel response{value};
            ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        const auto& buffer_queue = nv_flinger->FindBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
+        rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
    }

    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +752,7 @@ public:
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
            {2011, nullptr, "DestroyManagedLayer"},
+            {2012, nullptr, "CreateStrayLayer"},
            {2050, nullptr, "CreateIndirectLayer"},
            {2051, nullptr, "DestroyIndirectLayer"},
            {2052, nullptr, "CreateIndirectProducerEndPoint"},
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -28,100 +28,103 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) {
    REQUIRE(lateness == cycles_late);
 }

-class ScopeInit final {
-public:
+struct ScopeInit final {
    ScopeInit() {
-        Core::Timing::Init();
+        core_timing.Initialize();
    }
    ~ScopeInit() {
-        Core::Timing::Shutdown();
+        core_timing.Shutdown();
    }
+
+    Core::Timing::CoreTiming core_timing;
 };

-static void AdvanceAndCheck(u32 idx, int downcount, int expected_lateness = 0,
-                            int cpu_downcount = 0) {
+static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount,
+                            int expected_lateness = 0, int cpu_downcount = 0) {
    callbacks_ran_flags = 0;
    expected_callback = CB_IDS[idx];
    lateness = expected_lateness;

    // Pretend we executed X cycles of instructions.
-    Core::Timing::AddTicks(Core::Timing::GetDowncount() - cpu_downcount);
-    Core::Timing::Advance();
+    core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
+    core_timing.Advance();

    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
-    REQUIRE(downcount == Core::Timing::GetDowncount());
+    REQUIRE(downcount == core_timing.GetDowncount());
 }

 TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>);
-    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);

    // Enter slice 0
-    Core::Timing::Advance();
+    core_timing.Advance();

    // D -> B -> C -> A -> E
-    Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    REQUIRE(1000 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEvent(500, cb_b, CB_IDS[1]);
-    REQUIRE(500 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEvent(800, cb_c, CB_IDS[2]);
-    REQUIRE(500 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEvent(100, cb_d, CB_IDS[3]);
-    REQUIRE(100 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEvent(1200, cb_e, CB_IDS[4]);
-    REQUIRE(100 == Core::Timing::GetDowncount());
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
+    REQUIRE(100 == core_timing.GetDowncount());

-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }

 TEST_CASE("CoreTiming[Threadsave]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", CallbackTemplate<3>);
-    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", CallbackTemplate<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>);

    // Enter slice 0
-    Core::Timing::Advance();
+    core_timing.Advance();

    // D -> B -> C -> A -> E
-    Core::Timing::ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEventThreadsafe(1000, cb_a, CB_IDS[0]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    Core::Timing::ForceExceptionCheck(1000);
-    REQUIRE(1000 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
+    core_timing.ForceExceptionCheck(1000);
+    REQUIRE(1000 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(500, cb_b, CB_IDS[1]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    Core::Timing::ForceExceptionCheck(500);
-    REQUIRE(500 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
+    core_timing.ForceExceptionCheck(500);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(800, cb_c, CB_IDS[2]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    Core::Timing::ForceExceptionCheck(800);
-    REQUIRE(500 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
+    core_timing.ForceExceptionCheck(800);
+    REQUIRE(500 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(100, cb_d, CB_IDS[3]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    Core::Timing::ForceExceptionCheck(100);
-    REQUIRE(100 == Core::Timing::GetDowncount());
-    Core::Timing::ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
+    core_timing.ForceExceptionCheck(100);
+    REQUIRE(100 == core_timing.GetDowncount());
+    core_timing.ScheduleEventThreadsafe(1200, cb_e, CB_IDS[4]);
    // Manually force since ScheduleEventThreadsafe doesn't call it
-    Core::Timing::ForceExceptionCheck(1200);
-    REQUIRE(100 == Core::Timing::GetDowncount());
+    core_timing.ForceExceptionCheck(1200);
+    REQUIRE(100 == core_timing.GetDowncount());

-    AdvanceAndCheck(3, 400);
-    AdvanceAndCheck(1, 300);
-    AdvanceAndCheck(2, 200);
-    AdvanceAndCheck(0, 200);
-    AdvanceAndCheck(4, MAX_SLICE_LENGTH);
+    AdvanceAndCheck(core_timing, 3, 400);
+    AdvanceAndCheck(core_timing, 1, 300);
+    AdvanceAndCheck(core_timing, 2, 200);
+    AdvanceAndCheck(core_timing, 0, 200);
+    AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH);
 }

 namespace SharedSlotTest {
@@ -142,59 +145,62 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
    using namespace SharedSlotTest;

    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", FifoCallback<0>);
-    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", FifoCallback<1>);
-    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", FifoCallback<2>);
-    Core::Timing::EventType* cb_d = Core::Timing::RegisterEvent("callbackD", FifoCallback<3>);
-    Core::Timing::EventType* cb_e = Core::Timing::RegisterEvent("callbackE", FifoCallback<4>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>);
+    Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>);
+    Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>);

-    Core::Timing::ScheduleEvent(1000, cb_a, CB_IDS[0]);
-    Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    Core::Timing::ScheduleEvent(1000, cb_c, CB_IDS[2]);
-    Core::Timing::ScheduleEvent(1000, cb_d, CB_IDS[3]);
-    Core::Timing::ScheduleEvent(1000, cb_e, CB_IDS[4]);
+    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]);
+    core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]);

    // Enter slice 0
-    Core::Timing::Advance();
-    REQUIRE(1000 == Core::Timing::GetDowncount());
+    core_timing.Advance();
+    REQUIRE(1000 == core_timing.GetDowncount());

    callbacks_ran_flags = 0;
    counter = 0;
    lateness = 0;
-    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    Core::Timing::Advance();
-    REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount());
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance();
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
    REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
 }

 TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);

    // Enter slice 0
-    Core::Timing::Advance();
+    core_timing.Advance();

-    Core::Timing::ScheduleEvent(100, cb_a, CB_IDS[0]);
-    Core::Timing::ScheduleEvent(200, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);

-    AdvanceAndCheck(0, 90, 10, -10); // (100 - 10)
-    AdvanceAndCheck(1, MAX_SLICE_LENGTH, 50, -50);
+    AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10)
+    AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50);
 }

 namespace ChainSchedulingTest {
 static int reschedules = 0;

-static void RescheduleCallback(u64 userdata, s64 cycles_late) {
+static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata,
+                               s64 cycles_late) {
    --reschedules;
    REQUIRE(reschedules >= 0);
    REQUIRE(lateness == cycles_late);

    if (reschedules > 0) {
-        Core::Timing::ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
-                                    userdata);
+        core_timing.ScheduleEvent(1000, reinterpret_cast<Core::Timing::EventType*>(userdata),
+                                  userdata);
    }
 }
 } // namespace ChainSchedulingTest
@@ -203,36 +209,39 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
    using namespace ChainSchedulingTest;

    ScopeInit guard;
+    auto& core_timing = guard.core_timing;

-    Core::Timing::EventType* cb_a = Core::Timing::RegisterEvent("callbackA", CallbackTemplate<0>);
-    Core::Timing::EventType* cb_b = Core::Timing::RegisterEvent("callbackB", CallbackTemplate<1>);
-    Core::Timing::EventType* cb_c = Core::Timing::RegisterEvent("callbackC", CallbackTemplate<2>);
-    Core::Timing::EventType* cb_rs =
-        Core::Timing::RegisterEvent("callbackReschedule", RescheduleCallback);
+    Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>);
+    Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>);
+    Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>);
+    Core::Timing::EventType* cb_rs = core_timing.RegisterEvent(
+        "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) {
+            RescheduleCallback(core_timing, userdata, cycles_late);
+        });

    // Enter slice 0
-    Core::Timing::Advance();
+    core_timing.Advance();

-    Core::Timing::ScheduleEvent(800, cb_a, CB_IDS[0]);
-    Core::Timing::ScheduleEvent(1000, cb_b, CB_IDS[1]);
-    Core::Timing::ScheduleEvent(2200, cb_c, CB_IDS[2]);
-    Core::Timing::ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
-    REQUIRE(800 == Core::Timing::GetDowncount());
+    core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]);
+    core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]);
+    core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]);
+    core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast<u64>(cb_rs));
+    REQUIRE(800 == core_timing.GetDowncount());

    reschedules = 3;
-    AdvanceAndCheck(0, 200);  // cb_a
-    AdvanceAndCheck(1, 1000); // cb_b, cb_rs
+    AdvanceAndCheck(core_timing, 0, 200);  // cb_a
+    AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs
    REQUIRE(2 == reschedules);

-    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    Core::Timing::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
    REQUIRE(1 == reschedules);
-    REQUIRE(200 == Core::Timing::GetDowncount());
+    REQUIRE(200 == core_timing.GetDowncount());

-    AdvanceAndCheck(2, 800); // cb_c
+    AdvanceAndCheck(core_timing, 2, 800); // cb_c

-    Core::Timing::AddTicks(Core::Timing::GetDowncount());
-    Core::Timing::Advance(); // cb_rs
+    core_timing.AddTicks(core_timing.GetDowncount());
+    core_timing.Advance(); // cb_rs
    REQUIRE(0 == reschedules);
-    REQUIRE(MAX_SLICE_LENGTH == Core::Timing::GetDowncount());
+    REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount());
 }
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -74,6 +74,7 @@ add_library(video_core STATIC
    shader/decode/hfma2.cpp
    shader/decode/conversion.cpp
    shader/decode/memory.cpp
+    shader/decode/texture.cpp
    shader/decode/float_set_predicate.cpp
    shader/decode/integer_set_predicate.cpp
    shader/decode/half_set_predicate.cpp
@@ -101,6 +102,22 @@ add_library(video_core STATIC
    video_core.h
 )

+if (ENABLE_VULKAN)
+    target_sources(video_core PRIVATE
+        renderer_vulkan/declarations.h
+        renderer_vulkan/vk_device.cpp
+        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_memory_manager.cpp
+        renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_resource_manager.cpp
+        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_scheduler.cpp
+        renderer_vulkan/vk_scheduler.h)
+
+    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
+    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
+endif()
+
 create_target_directory_groups(video_core)

 target_link_libraries(video_core PUBLIC common core)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
 }

 bool DmaPusher::Step() {
-    if (dma_get != dma_put) {
-        // Push buffer non-empty, read a word
-        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-        ASSERT_MSG(address, "Invalid GPU address");
+    if (!ib_enable || dma_pushbuffer.empty()) {
+        // pushbuffer empty and IB empty or nonexistent - nothing to do
+        return false;
+    }

-        const CommandHeader command_header{Memory::Read32(*address)};
+    const CommandList& command_list{dma_pushbuffer.front()};
+    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    GPUVAddr dma_get = command_list_header.addr;
+    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
+    bool non_main = command_list_header.is_non_main;

-        dma_get += sizeof(u32);
+    if (dma_pushbuffer_subindex >= command_list.size()) {
+        // We've gone through the current list, remove it from the queue
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+    }

-        if (!non_main) {
-            dma_mget = dma_get;
-        }
+    if (command_list_header.size == 0) {
+        return true;
+    }
+
+    // Push buffer non-empty, read a word
+    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+    ASSERT_MSG(address, "Invalid GPU address");
+
+    command_headers.resize(command_list_header.size);
+
+    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+
+    for (const CommandHeader& command_header : command_headers) {

        // now, see if we're in the middle of a command
        if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    } else if (ib_enable && !dma_pushbuffer.empty()) {
-        // Current pushbuffer empty, but we have more IB entries to read
-        const CommandList& command_list{dma_pushbuffer.front()};
-        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-        dma_get = command_list_header.addr;
-        dma_put = dma_get + command_list_header.size * sizeof(u32);
-        non_main = command_list_header.is_non_main;
+    }

-        if (dma_pushbuffer_subindex >= command_list.size()) {
-            // We've gone through the current list, remove it from the queue
-            dma_pushbuffer.pop();
-            dma_pushbuffer_subindex = 0;
-        }
-    } else {
-        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
-        return {};
+    if (!non_main) {
+        // TODO (degasus): This is dead code, as dma_mget is never read.
+        dma_mget = dma_put;
    }

    return true;
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:

    GPU& gpu;

+    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
+
    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer

@@ -89,11 +91,8 @@ private:
    DmaState dma_state{};
    bool dma_increment_once{};

-    GPUVAddr dma_put{};   ///< pushbuffer current end address
-    GPUVAddr dma_get{};   ///< pushbuffer current read address
    GPUVAddr dma_mget{};  ///< main pushbuffer last read address
    bool ib_enable{true}; ///< IB mode enabled
-    bool non_main{};      ///< non-main pushbuffer active
 };

 } // namespace Tegra
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -11,9 +12,9 @@

 namespace Tegra::Engines {

-KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
+KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                           MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}

 KeplerMemory::~KeplerMemory() = default;

@@ -50,7 +51,7 @@ void KeplerMemory::ProcessData(u32 data) {
    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));

    Memory::Write32(*dest_address, data);
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
 }
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,13 +5,16 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -23,7 +26,8 @@ namespace Tegra::Engines {

 class KeplerMemory final {
 public:
-    KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                 MemoryManager& memory_manager);
    ~KeplerMemory();

    /// Write the value to the register identified by method.
@@ -76,6 +80,7 @@ public:
    } state{};

 private:
+    Core::System& system;
    MemoryManager& memory_manager;
    VideoCore::RasterizerInterface& rasterizer;

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -19,8 +19,10 @@ namespace Tegra::Engines {
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;

-Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                     MemoryManager& memory_manager)
+    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
+      macro_interpreter(*this) {
    InitializeRegisterDefaults();
 }

@@ -103,7 +105,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 }

 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();

    // It is an error to write to a register other than the current macro's ARG register before it
    // has finished execution.
@@ -317,7 +319,7 @@ void Maxwell3D::ProcessQueryGet() {
            LongQueryResult query_result{};
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = Core::Timing::GetTicks();
+            query_result.timestamp = system.CoreTiming().GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
@@ -334,7 +336,7 @@ void Maxwell3D::DrawArrays() {
              regs.vertex_buffer.count);
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");

-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,6 +17,10 @@
 #include "video_core/memory_manager.h"
 #include "video_core/textures/texture.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -28,7 +32,8 @@ namespace Tegra::Engines {

 class Maxwell3D final {
 public:
-    explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager);
    ~Maxwell3D() = default;

    /// Register structure of the Maxwell3D engine.
@@ -1131,6 +1136,8 @@ public:
 private:
    void InitializeRegisterDefaults();

+    Core::System& system;
+
    VideoCore::RasterizerInterface& rasterizer;

    /// Start offsets of each macro in macro_memory
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -11,8 +12,9 @@

 namespace Tegra::Engines {

-MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager)
+    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}

 void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -59,7 +61,7 @@ void MaxwellDMA::HandleCopy() {
    }

    // All copies here update the main memory, so mark all rasterizer states as invalid.
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,13 +5,16 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -20,7 +23,8 @@ namespace Tegra::Engines {

 class MaxwellDMA final {
 public:
-    explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                        MemoryManager& memory_manager);
    ~MaxwellDMA() = default;

    /// Write the value to the register identified by method.
@@ -137,6 +141,8 @@ public:
    MemoryManager& memory_manager;

 private:
+    Core::System& system;
+
    VideoCore::RasterizerInterface& rasterizer;

    /// Performs the copy from the source buffer to the destination buffer as configured in the
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -325,11 +325,11 @@ enum class TextureQueryType : u64 {

 enum class TextureProcessMode : u64 {
    None = 0,
-    LZ = 1,  // Unknown, appears to be the same as none.
+    LZ = 1,  // Load LOD of zero.
    LB = 2,  // Load Bias.
-    LL = 3,  // Load LOD (LevelOfDetail)
-    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
-    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
+    LL = 3,  // Load LOD.
+    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
+    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL.
 };

 enum class TextureMiscMode : u64 {
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };

 enum class IpaInterpMode : u64 {
-    Linear = 0,
-    Perspective = 1,
-    Flat = 2,
+    Pass = 0,
+    Multiply = 1,
+    Constant = 2,
    Sc = 3,
 };

@@ -1446,6 +1446,7 @@ public:
        Flow,
        Synch,
        Memory,
+        Texture,
        FloatSet,
        FloatSetPredicate,
        IntegerSet,
@@ -1576,14 +1577,14 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
-            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
-            INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
-            INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
-            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
-            INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
-            INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
-            INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
-            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
+            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
+            INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
+            INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
+            INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
+            INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+            INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
+            INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
+            INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
            INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
    TriangleStrip = 7,
 };

+enum class AttributeUse : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -84,9 +91,15 @@ struct Header {
        } vtg;

        struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            union {
+                BitField<0, 2, AttributeUse> x;
+                BitField<2, 2, AttributeUse> y;
+                BitField<4, 2, AttributeUse> w;
+                BitField<6, 2, AttributeUse> z;
+                u8 raw;
+            } imap_generic_vector[32];
            INSERT_PADDING_BYTES(2);  // ImapColor
            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
                const u32 bit = render_target * 4 + component;
                return omap.target & (1 << bit);
            }
+            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
+                return static_cast<AttributeUse>(
+                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
+            }
+            AttributeUse GetAttributeUse(u32 attribute) const {
+                AttributeUse result = AttributeUse::Unused;
+                for (u32 i = 0; i < 4; i++) {
+                    const auto index = GetAttributeIndexUse(attribute, i);
+                    if (index == AttributeUse::Unused) {
+                        continue;
+                    }
+                    if (result == AttributeUse::Unused || result == index) {
+                        result = index;
+                        continue;
+                    }
+                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
+                    if (index == AttributeUse::Perspective) {
+                        result = index;
+                    }
+                }
+                return result;
+            }
        } ps;
    };

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
+#include "core/core.h"
 #include "core/core_timing.h"
 #include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
@@ -18,7 +19,6 @@ namespace Tegra {
 u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    switch (format) {
    case PixelFormat::ABGR8:
-    case PixelFormat::RGB565:
    case PixelFormat::BGRA8:
        return 4;
    default:
@@ -28,14 +28,14 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    UNREACHABLE();
 }

-GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
    memory_manager = std::make_unique<Tegra::MemoryManager>();
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
-    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
-    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
 }

 GPU::~GPU() = default;
@@ -284,7 +284,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
        block.sequence = regs.semaphore_sequence;
        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
        // CoreTiming
-        block.timestamp = Core::Timing::GetTicks();
+        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
        Memory::WriteBlock(*address, &block, sizeof(block));
    } else {
        const auto address =
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,12 +6,15 @@

 #include <array>
 #include <memory>
-#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -80,7 +83,6 @@ class DebugContext;
 struct FramebufferConfig {
    enum class PixelFormat : u32 {
        ABGR8 = 1,
-        RGB565 = 4,
        BGRA8 = 5,
    };

@@ -119,7 +121,7 @@ enum class EngineID {

 class GPU final {
 public:
-    explicit GPU(VideoCore::RasterizerInterface& rasterizer);
+    explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
    ~GPU();

    struct MethodCall {
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -423,7 +423,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
        for (u32 i = 0; i < params.depth; i++) {
            MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                          params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
-                          params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
+                          params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
                          gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
@@ -1257,7 +1257,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        FastLayeredCopySurface(old_surface, new_surface);
+        if (old_params.pixel_format == new_params.pixel_format)
+            FastLayeredCopySurface(old_surface, new_surface);
+        else {
+            AccurateCopySurface(old_surface, new_surface);
+        }
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -36,7 +36,6 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;

 struct SurfaceParams {
-
    enum class SurfaceClass {
        Uploaded,
        RenderTarget,
@@ -169,20 +168,27 @@ struct SurfaceParams {
    }

    u32 MipBlockDepth(u32 mip_level) const {
-        if (mip_level == 0)
+        if (mip_level == 0) {
            return block_depth;
-        if (is_layered)
+        }
+
+        if (is_layered) {
            return 1;
-        u32 depth = MipDepth(mip_level);
+        }
+
+        const u32 mip_depth = MipDepth(mip_level);
        u32 bd = 32;
-        while (bd > 1 && depth * 2 <= bd) {
+        while (bd > 1 && mip_depth * 2 <= bd) {
            bd >>= 1;
        }
+
        if (bd == 32) {
-            u32 bh = MipBlockHeight(mip_level);
-            if (bh >= 4)
+            const u32 bh = MipBlockHeight(mip_level);
+            if (bh >= 4) {
                return 16;
+            }
        }
+
        return bd;
    }

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
 #include <array>
 #include <string>
 #include <string_view>
+#include <utility>
 #include <variant>
+#include <vector>

 #include <fmt/format.h>

@@ -20,6 +22,7 @@
 namespace OpenGL::GLShader {

 using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -288,34 +291,22 @@ private:
        code.AddNewLine();
    }

-    std::string GetInputFlags(const IpaMode& input_mode) {
-        const IpaSampleMode sample_mode = input_mode.sampling_mode;
-        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
+    std::string GetInputFlags(AttributeUse attribute) {
        std::string out;

-        switch (interp_mode) {
-        case IpaInterpMode::Flat:
+        switch (attribute) {
+        case AttributeUse::Constant:
            out += "flat ";
            break;
-        case IpaInterpMode::Linear:
+        case AttributeUse::ScreenLinear:
            out += "noperspective ";
            break;
-        case IpaInterpMode::Perspective:
+        case AttributeUse::Perspective:
            // Default, Smooth
            break;
        default:
-            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
-        }
-        switch (sample_mode) {
-        case IpaSampleMode::Centroid:
-            // It can be implemented with the "centroid " keyword in GLSL
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
-            break;
-        case IpaSampleMode::Default:
-            // Default, n/a
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
+            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
+            UNREACHABLE();
        }
        return out;
    }
@@ -324,16 +315,11 @@ private:
        const auto& attributes = ir.GetInputAttributes();
        for (const auto element : attributes) {
            const Attribute::Index index = element.first;
-            const IpaMode& input_mode = *element.second.begin();
            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                // Skip when it's not a generic attribute
                continue;
            }

-            ASSERT(element.second.size() > 0);
-            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
-                                 "Multiple input flag modes are not supported in GLSL");
-
            // TODO(bunnei): Use proper number of elements for these
            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
            if (stage != ShaderStage::Vertex) {
@@ -345,8 +331,14 @@ private:
            if (stage == ShaderStage::Geometry) {
                attr = "gs_" + attr + "[]";
            }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
-                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
+            std::string suffix;
+            if (stage == ShaderStage::Fragment) {
+                const auto input_mode =
+                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
+                suffix = GetInputFlags(input_mode);
+            }
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
+                         attr + ';');
        }
        if (!attributes.empty())
            code.AddNewLine();
@@ -616,17 +608,8 @@ private:

    std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
        std::string value = VisitOperand(operation, operand_index);
-
        switch (type) {
-        case Type::Bool:
-        case Type::Bool2:
-        case Type::Float:
-            return value;
-        case Type::Int:
-            return "ftoi(" + value + ')';
-        case Type::Uint:
-            return "ftou(" + value + ')';
-        case Type::HalfFloat:
+        case Type::HalfFloat: {
            const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta());
            if (!half_meta) {
                value = "toHalf2(" + value + ')';
@@ -643,6 +626,26 @@ private:
                return "vec2(toHalf2(" + value + ")[1])";
            }
        }
+        default:
+            return CastOperand(value, type);
+        }
+    }
+
+    std::string CastOperand(const std::string& value, Type type) const {
+        switch (type) {
+        case Type::Bool:
+        case Type::Bool2:
+        case Type::Float:
+            return value;
+        case Type::Int:
+            return "ftoi(" + value + ')';
+        case Type::Uint:
+            return "ftou(" + value + ')';
+        case Type::HalfFloat:
+            // Can't be handled as a stand-alone value
+            UNREACHABLE();
+            return value;
+        }
        UNREACHABLE();
        return value;
    }
@@ -650,6 +653,7 @@ private:
    std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
        switch (type) {
        case Type::Bool:
+        case Type::Bool2:
        case Type::Float:
            if (needs_parenthesis) {
                return '(' + value + ')';
@@ -715,13 +719,13 @@ private:
    }

    std::string GenerateTexture(Operation operation, const std::string& func,
-                                bool is_extra_int = false) {
+                                const std::vector<std::pair<Type, Node>>& extras) {
        constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};

        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        const auto count = static_cast<u32>(operation.GetOperandsCount());
+        const std::size_t count = operation.GetOperandsCount();
        const bool has_array = meta->sampler.IsArray();
        const bool has_shadow = meta->sampler.IsShadow();

@@ -732,40 +736,51 @@ private:

        expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
            expr += Visit(operation[i]);

-            const u32 next = i + 1;
-            if (next < count || has_array || has_shadow)
+            const std::size_t next = i + 1;
+            if (next < count)
                expr += ", ";
        }
        if (has_array) {
-            expr += "float(ftoi(" + Visit(meta->array) + "))";
+            expr += ", float(ftoi(" + Visit(meta->array) + "))";
        }
        if (has_shadow) {
-            if (has_array)
-                expr += ", ";
-            expr += Visit(meta->depth_compare);
+            expr += ", " + Visit(meta->depth_compare);
        }
        expr += ')';

-        for (const Node extra : meta->extras) {
+        for (const auto& extra_pair : extras) {
+            const auto [type, operand] = extra_pair;
+            if (operand == nullptr) {
+                continue;
+            }
            expr += ", ";
-            if (is_extra_int) {
-                if (const auto immediate = std::get_if<ImmediateNode>(extra)) {
+
+            switch (type) {
+            case Type::Int:
+                if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
                    // Inline the string as an immediate integer in GLSL (some extra arguments are
                    // required to be constant)
                    expr += std::to_string(static_cast<s32>(immediate->GetValue()));
                } else {
-                    expr += "ftoi(" + Visit(extra) + ')';
+                    expr += "ftoi(" + Visit(operand) + ')';
                }
-            } else {
-                expr += Visit(extra);
+                break;
+            case Type::Float:
+                expr += Visit(operand);
+                break;
+            default: {
+                const auto type_int = static_cast<u32>(type);
+                UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+                expr += '0';
+                break;
+            }
            }
        }

-        expr += ')';
-        return expr;
+        return expr + ')';
    }

    std::string Assign(Operation operation) {
@@ -1144,7 +1159,7 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "texture");
+        std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1155,7 +1170,7 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        std::string expr = GenerateTexture(operation, "textureLod");
+        std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
        if (meta->sampler.IsShadow()) {
            expr = "vec4(" + expr + ')';
        }
@@ -1166,7 +1181,8 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);

-        return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) +
+        const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
+        return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
               GetSwizzle(meta->element);
    }

@@ -1195,8 +1211,8 @@ private:
        ASSERT(meta);

        if (meta->element < 2) {
-            return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" +
-                   GetSwizzle(meta->element) + "))";
+            return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
+                   " * vec2(256))" + GetSwizzle(meta->element) + "))";
        }
        return "0";
    }
@@ -1206,25 +1222,26 @@ private:
        const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
        ASSERT(meta);
        UNIMPLEMENTED_IF(meta->sampler.IsArray());
-        UNIMPLEMENTED_IF(!meta->extras.empty());
-
-        const auto count = static_cast<u32>(operation.GetOperandsCount());
+        const std::size_t count = operation.GetOperandsCount();

        std::string expr = "texelFetch(";
        expr += GetSampler(meta->sampler);
        expr += ", ";

-        expr += constructors.at(count - 1);
+        expr += constructors.at(operation.GetOperandsCount() - 1);
        expr += '(';
-        for (u32 i = 0; i < count; ++i) {
+        for (std::size_t i = 0; i < count; ++i) {
            expr += VisitOperand(operation, i, Type::Int);
-
-            const u32 next = i + 1;
+            const std::size_t next = i + 1;
            if (next == count)
                expr += ')';
-            if (next < count)
+            else if (next < count)
                expr += ", ";
        }
+        if (meta->lod) {
+            expr += ", ";
+            expr += CastOperand(Visit(meta->lod), Type::Int);
+        }
        expr += ')';

        return expr + GetSwizzle(meta->element);
@@ -1571,4 +1588,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (location = 0) in vec4 position;
+layout (location = 0) in noperspective vec4 position;

 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
    vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
    return {out, program.second};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,7 +11,9 @@
 namespace OpenGL {

 OpenGLState OpenGLState::cur_state;
+
 bool OpenGLState::s_rgb_used;
+
 OpenGLState::OpenGLState() {
    // These all match default OpenGL values
    geometry_shaders.enabled = false;
@@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() {
 }

 void OpenGLState::ApplySRgb() const {
-    // sRGB
    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
        if (framebuffer_srgb.enabled) {
            // Track if sRGB is used
@@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const {
 }

 void OpenGLState::ApplyCulling() const {
-    // Culling
-    const bool cull_changed = cull.enabled != cur_state.cull.enabled;
-    if (cull_changed) {
+    if (cull.enabled != cur_state.cull.enabled) {
        if (cull.enabled) {
            glEnable(GL_CULL_FACE);
        } else {
            glDisable(GL_CULL_FACE);
        }
    }
-    if (cull.enabled) {
-        if (cull_changed || cull.mode != cur_state.cull.mode) {
-            glCullFace(cull.mode);
-        }

-        if (cull_changed || cull.front_face != cur_state.cull.front_face) {
-            glFrontFace(cull.front_face);
-        }
+    if (cull.mode != cur_state.cull.mode) {
+        glCullFace(cull.mode);
+    }
+
+    if (cull.front_face != cur_state.cull.front_face) {
+        glFrontFace(cull.front_face);
    }
 }

@@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const {
 }

 void OpenGLState::ApplyDepth() const {
-    // Depth test
-    const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
-    if (depth_test_changed) {
+    if (depth.test_enabled != cur_state.depth.test_enabled) {
        if (depth.test_enabled) {
            glEnable(GL_DEPTH_TEST);
        } else {
            glDisable(GL_DEPTH_TEST);
        }
    }
-    if (depth.test_enabled &&
-        (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
+
+    if (depth.test_func != cur_state.depth.test_func) {
        glDepthFunc(depth.test_func);
    }
-    // Depth mask
+
    if (depth.write_mask != cur_state.depth.write_mask) {
        glDepthMask(depth.write_mask);
    }
 }

 void OpenGLState::ApplyPrimitiveRestart() const {
-    const bool primitive_restart_changed =
-        primitive_restart.enabled != cur_state.primitive_restart.enabled;
-    if (primitive_restart_changed) {
+    if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
        if (primitive_restart.enabled) {
            glEnable(GL_PRIMITIVE_RESTART);
        } else {
            glDisable(GL_PRIMITIVE_RESTART);
        }
    }
-    if (primitive_restart_changed ||
-        (primitive_restart.enabled &&
-         primitive_restart.index != cur_state.primitive_restart.index)) {
+
+    if (primitive_restart.index != cur_state.primitive_restart.index) {
        glPrimitiveRestartIndex(primitive_restart.index);
    }
 }

 void OpenGLState::ApplyStencilTest() const {
-    const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
-    if (stencil_test_changed) {
+    if (stencil.test_enabled != cur_state.stencil.test_enabled) {
        if (stencil.test_enabled) {
            glEnable(GL_STENCIL_TEST);
        } else {
            glDisable(GL_STENCIL_TEST);
        }
    }
-    if (stencil.test_enabled) {
-        auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
-                                                     const auto& prev_config) {
-            if (stencil_test_changed || config.test_func != prev_config.test_func ||
-                config.test_ref != prev_config.test_ref ||
-                config.test_mask != prev_config.test_mask) {
-                glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
-            }
-            if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
-                config.action_depth_pass != prev_config.action_depth_pass ||
-                config.action_stencil_fail != prev_config.action_stencil_fail) {
-                glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
-                                    config.action_depth_pass);
-            }
-            if (config.write_mask != prev_config.write_mask) {
-                glStencilMaskSeparate(face, config.write_mask);
-            }
-        };
-        config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
-        config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
-    }
+
+    const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
+        if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
+            config.test_mask != prev_config.test_mask) {
+            glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+        }
+        if (config.action_depth_fail != prev_config.action_depth_fail ||
+            config.action_depth_pass != prev_config.action_depth_pass ||
+            config.action_stencil_fail != prev_config.action_stencil_fail) {
+            glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+                                config.action_depth_pass);
+        }
+        if (config.write_mask != prev_config.write_mask) {
+            glStencilMaskSeparate(face, config.write_mask);
+        }
+    };
+    ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+    ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
 // Viewport does not affects glClearBuffer so emulate viewport using scissor test
 void OpenGLState::EmulateViewportWithScissor() {
@@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const {
                updated.depth_range_far != current.depth_range_far) {
                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
            }
-            const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-            if (scissor_changed) {
+
+            if (updated.scissor.enabled != current.scissor.enabled) {
                if (updated.scissor.enabled) {
                    glEnablei(GL_SCISSOR_TEST, i);
                } else {
                    glDisablei(GL_SCISSOR_TEST, i);
                }
            }
-            if (updated.scissor.enabled &&
-                (scissor_changed || updated.scissor.x != current.scissor.x ||
-                 updated.scissor.y != current.scissor.y ||
-                 updated.scissor.width != current.scissor.width ||
-                 updated.scissor.height != current.scissor.height)) {
+
+            if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
+                updated.scissor.width != current.scissor.width ||
+                updated.scissor.height != current.scissor.height) {
                glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
                                 updated.scissor.height);
            }
@@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const {
            updated.height != current.height) {
            glViewport(updated.x, updated.y, updated.width, updated.height);
        }
+
        if (updated.depth_range_near != current.depth_range_near ||
            updated.depth_range_far != current.depth_range_far) {
            glDepthRange(updated.depth_range_near, updated.depth_range_far);
        }
-        const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-        if (scissor_changed) {
+
+        if (updated.scissor.enabled != current.scissor.enabled) {
            if (updated.scissor.enabled) {
                glEnable(GL_SCISSOR_TEST);
            } else {
                glDisable(GL_SCISSOR_TEST);
            }
        }
-        if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x ||
-                                        updated.scissor.y != current.scissor.y ||
-                                        updated.scissor.width != current.scissor.width ||
-                                        updated.scissor.height != current.scissor.height)) {
+
+        if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
+            updated.scissor.width != current.scissor.width ||
+            updated.scissor.height != current.scissor.height) {
            glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
                      updated.scissor.height);
        }
@@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const {
 void OpenGLState::ApplyGlobalBlending() const {
    const Blend& current = cur_state.blend[0];
    const Blend& updated = blend[0];
-    const bool blend_changed = updated.enabled != current.enabled;
-    if (blend_changed) {
+    if (updated.enabled != current.enabled) {
        if (updated.enabled) {
            glEnable(GL_BLEND);
        } else {
@@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const {
    if (!updated.enabled) {
        return;
    }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+    if (updated.src_rgb_func != current.src_rgb_func ||
        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
        updated.dst_a_func != current.dst_a_func) {
        glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
                            updated.dst_a_func);
    }

-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
        glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
    }
 }
@@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const {
 void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
    const Blend& updated = blend[target];
    const Blend& current = cur_state.blend[target];
-    const bool blend_changed = updated.enabled != current.enabled || force;
-    if (blend_changed) {
+    if (updated.enabled != current.enabled || force) {
        if (updated.enabled) {
            glEnablei(GL_BLEND, static_cast<GLuint>(target));
        } else {
            glDisablei(GL_BLEND, static_cast<GLuint>(target));
        }
    }
-    if (!updated.enabled) {
-        return;
-    }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+
+    if (updated.src_rgb_func != current.src_rgb_func ||
        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
        updated.dst_a_func != current.dst_a_func) {
        glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
                             updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
    }

-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
        glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
                                 updated.a_equation);
    }
@@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const {
 }

 void OpenGLState::ApplyLogicOp() const {
-    const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
-    if (logic_op_changed) {
+    if (logic_op.enabled != cur_state.logic_op.enabled) {
        if (logic_op.enabled) {
            glEnable(GL_COLOR_LOGIC_OP);
        } else {
@@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const {
        }
    }

-    if (logic_op.enabled &&
-        (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
+    if (logic_op.operation != cur_state.logic_op.operation) {
        glLogicOp(logic_op.operation);
    }
 }

 void OpenGLState::ApplyPolygonOffset() const {
-
    const bool fill_enable_changed =
        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
    const bool line_enable_changed =
@@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const {
        }
    }

-    if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
-        (factor_changed || units_changed || clamp_changed)) {
-
+    if (factor_changed || units_changed || clamp_changed) {
        if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
            glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
        } else {
@@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const {
        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
        return;
    }
-    if (depth_clamp.far_plane != depth_clamp.near_plane) {
-        UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!");
-    }
+    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                         "Unimplemented Depth Clamp Separation!");
+
    if (depth_clamp.far_plane || depth_clamp.near_plane) {
        glEnable(GL_DEPTH_CLAMP);
    } else {
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -137,7 +137,7 @@ void RendererOpenGL::SwapBuffers(

    render_window.PollEvents();

-    system.FrameLimiter().DoFrameLimiting(Core::Timing::GetGlobalTimeUs());
+    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
    system.GetPerfStats().BeginSystemFrame();

    // Restore the rasterizer state
@@ -266,12 +266,6 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
        texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
        gl_framebuffer_data.resize(texture.width * texture.height * 4);
        break;
-    case Tegra::FramebufferConfig::PixelFormat::RGB565:
-        internal_format = GL_RGB;
-        texture.gl_format = GL_RGB;
-        texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
-        gl_framebuffer_data.resize(texture.width * texture.height * 4);
-        break;
    default:
        internal_format = GL_RGBA8;
        texture.gl_format = GL_RGBA;
@@ -386,7 +380,8 @@ void RendererOpenGL::CaptureScreenshot() {
    GLuint renderbuffer;
    glGenRenderbuffers(1, &renderbuffer);
    glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
-    glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height);
+    glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
+                          layout.height);
    glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);

    DrawScreen(layout);
--- a/src/video_core/renderer_vulkan/declarations.h
+++ b/src/video_core/renderer_vulkan/declarations.h
@@ -0,0 +1,45 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vulkan/vulkan.hpp>
+
+namespace Vulkan {
+
+// vulkan.hpp unique handlers use DispatchLoaderStatic
+template <typename T>
+using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
+
+using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
+using UniqueBuffer = UniqueHandle<vk::Buffer>;
+using UniqueBufferView = UniqueHandle<vk::BufferView>;
+using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
+using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
+using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
+using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
+using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
+using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
+using UniqueDevice = UniqueHandle<vk::Device>;
+using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
+using UniqueEvent = UniqueHandle<vk::Event>;
+using UniqueFence = UniqueHandle<vk::Fence>;
+using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
+using UniqueImage = UniqueHandle<vk::Image>;
+using UniqueImageView = UniqueHandle<vk::ImageView>;
+using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
+using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
+using UniquePipeline = UniqueHandle<vk::Pipeline>;
+using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
+using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
+using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
+using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
+using UniqueSampler = UniqueHandle<vk::Sampler>;
+using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
+using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
+using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
+using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
+using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -0,0 +1,231 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <optional>
+#include <set>
+#include <vector>
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+
+namespace Vulkan {
+
+namespace Alternatives {
+
+constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
+    vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
+constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
+    vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+
+} // namespace Alternatives
+
+constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
+    switch (format) {
+    case vk::Format::eD24UnormS8Uint:
+        return Alternatives::Depth24UnormS8Uint.data();
+    case vk::Format::eD16UnormS8Uint:
+        return Alternatives::Depth16UnormS8Uint.data();
+    default:
+        return nullptr;
+    }
+}
+
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties,
+                                                   FormatType format_type) {
+    switch (format_type) {
+    case FormatType::Linear:
+        return properties.linearTilingFeatures;
+    case FormatType::Optimal:
+        return properties.optimalTilingFeatures;
+    case FormatType::Buffer:
+        return properties.bufferFeatures;
+    default:
+        return {};
+    }
+}
+
+VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                   vk::SurfaceKHR surface)
+    : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
+    SetupFamilies(dldi, surface);
+    SetupProperties(dldi);
+}
+
+VKDevice::~VKDevice() = default;
+
+bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
+    const auto queue_cis = GetDeviceQueueCreateInfos();
+    vk::PhysicalDeviceFeatures device_features{};
+
+    const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
+                                         0, nullptr, static_cast<u32>(extensions.size()),
+                                         extensions.data(), &device_features);
+    vk::Device dummy_logical;
+    if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
+        return false;
+    }
+
+    dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
+    logical = UniqueDevice(
+        dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+
+    graphics_queue = logical->getQueue(graphics_family, 0, dld);
+    present_queue = logical->getQueue(present_family, 0, dld);
+    return true;
+}
+
+vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
+                                        vk::FormatFeatureFlags wanted_usage,
+                                        FormatType format_type) const {
+    if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
+        return wanted_format;
+    }
+    // The wanted format is not supported by hardware, search for alternatives
+    const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
+    if (alternatives == nullptr) {
+        LOG_CRITICAL(Render_Vulkan,
+                     "Format={} with usage={} and type={} has no defined alternatives and host "
+                     "hardware does not support it",
+                     static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                     static_cast<u32>(format_type));
+        UNREACHABLE();
+        return wanted_format;
+    }
+
+    std::size_t i = 0;
+    for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
+         alternative = alternatives[++i]) {
+        if (!IsFormatSupported(alternative, wanted_usage, format_type))
+            continue;
+        LOG_WARNING(Render_Vulkan,
+                    "Emulating format={} with alternative format={} with usage={} and type={}",
+                    static_cast<u32>(wanted_format), static_cast<u32>(alternative),
+                    static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
+        return alternative;
+    }
+
+    // No alternatives found, panic
+    LOG_CRITICAL(Render_Vulkan,
+                 "Format={} with usage={} and type={} is not supported by the host hardware and "
+                 "doesn't support any of the alternatives",
+                 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                 static_cast<u32>(format_type));
+    UNREACHABLE();
+    return wanted_format;
+}
+
+bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                 FormatType format_type) const {
+    const auto it = format_properties.find(wanted_format);
+    if (it == format_properties.end()) {
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}",
+                     static_cast<u32>(wanted_format));
+        UNREACHABLE();
+        return true;
+    }
+    const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+    return (supported_usage & wanted_usage) == wanted_usage;
+}
+
+bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                          vk::SurfaceKHR surface) {
+    const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
+
+    bool has_swapchain{};
+    for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+        has_swapchain |= prop.extensionName == swapchain_extension;
+    }
+    if (!has_swapchain) {
+        // The device doesn't support creating swapchains.
+        return false;
+    }
+
+    bool has_graphics{}, has_present{};
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        const auto& family = queue_family_properties[i];
+        if (family.queueCount == 0)
+            continue;
+
+        has_graphics |=
+            (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
+        has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
+    }
+    if (!has_graphics || !has_present) {
+        // The device doesn't have a graphics and present queue.
+        return false;
+    }
+
+    // TODO(Rodrigo): Check if the device matches all requeriments.
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    if (props.limits.maxUniformBufferRange < 65536) {
+        return false;
+    }
+
+    // Device is suitable.
+    return true;
+}
+
+void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
+    std::optional<u32> graphics_family_, present_family_;
+
+    const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
+    for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
+        if (graphics_family_ && present_family_)
+            break;
+
+        const auto& queue_family = queue_family_properties[i];
+        if (queue_family.queueCount == 0)
+            continue;
+
+        if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics)
+            graphics_family_ = i;
+        if (physical.getSurfaceSupportKHR(i, surface, dldi))
+            present_family_ = i;
+    }
+    ASSERT(graphics_family_ && present_family_);
+
+    graphics_family = *graphics_family_;
+    present_family = *present_family_;
+}
+
+void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
+    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+    device_type = props.deviceType;
+    uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+}
+
+std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
+    static const float QUEUE_PRIORITY = 1.f;
+
+    std::set<u32> unique_queue_families = {graphics_family, present_family};
+    std::vector<vk::DeviceQueueCreateInfo> queue_cis;
+
+    for (u32 queue_family : unique_queue_families)
+        queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY});
+
+    return queue_cis;
+}
+
+std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+    const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    std::map<vk::Format, vk::FormatProperties> format_properties;
+
+    const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+        format_properties.emplace(format, physical.getFormatProperties(format, dldi));
+    };
+    AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
+    AddFormatQuery(vk::Format::eR5G6B5UnormPack16);
+    AddFormatQuery(vk::Format::eD32Sfloat);
+    AddFormatQuery(vk::Format::eD16UnormS8Uint);
+    AddFormatQuery(vk::Format::eD24UnormS8Uint);
+    AddFormatQuery(vk::Format::eD32SfloatS8Uint);
+
+    return format_properties;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -0,0 +1,116 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+/// Format usage descriptor
+enum class FormatType { Linear, Optimal, Buffer };
+
+/// Handles data specific to a physical device.
+class VKDevice final {
+public:
+    explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                      vk::SurfaceKHR surface);
+    ~VKDevice();
+
+    /// Initializes the device. Returns true on success.
+    bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance);
+
+    /**
+     * Returns a format supported by the device for the passed requeriments.
+     * @param wanted_format The ideal format to be returned. It may not be the returned format.
+     * @param wanted_usage The usage that must be fulfilled even if the format is not supported.
+     * @param format_type Format type usage.
+     * @returns A format supported by the device.
+     */
+    vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                                  FormatType format_type) const;
+
+    /// Returns the dispatch loader with direct function pointers of the device
+    const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
+        return dld;
+    }
+
+    /// Returns the logical device
+    vk::Device GetLogical() const {
+        return logical.get();
+    }
+
+    /// Returns the physical device.
+    vk::PhysicalDevice GetPhysical() const {
+        return physical;
+    }
+
+    /// Returns the main graphics queue.
+    vk::Queue GetGraphicsQueue() const {
+        return graphics_queue;
+    }
+
+    /// Returns the main present queue.
+    vk::Queue GetPresentQueue() const {
+        return present_queue;
+    }
+
+    /// Returns main graphics queue family index.
+    u32 GetGraphicsFamily() const {
+        return graphics_family;
+    }
+
+    /// Returns main present queue family index.
+    u32 GetPresentFamily() const {
+        return present_family;
+    }
+
+    /// Returns if the device is integrated with the host CPU
+    bool IsIntegrated() const {
+        return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
+    }
+
+    /// Returns uniform buffer alignment requeriment
+    u64 GetUniformBufferAlignment() const {
+        return uniform_buffer_alignment;
+    }
+
+    /// Checks if the physical device is suitable.
+    static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
+                           vk::SurfaceKHR surface);
+
+private:
+    /// Sets up queue families.
+    void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
+
+    /// Sets up device properties.
+    void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
+
+    /// Returns a list of queue initialization descriptors.
+    std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
+
+    /// Returns true if a format is supported.
+    bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
+                           FormatType format_type) const;
+
+    /// Returns the device properties for Vulkan formats.
+    static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+        const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
+
+    const vk::PhysicalDevice physical;  ///< Physical device
+    vk::DispatchLoaderDynamic dld;      ///< Device function pointers
+    UniqueDevice logical;               ///< Logical device
+    vk::Queue graphics_queue;           ///< Main graphics queue
+    vk::Queue present_queue;            ///< Main present queue
+    u32 graphics_family{};              ///< Main graphics queue family index
+    u32 present_family{};               ///< Main present queue family index
+    vk::PhysicalDeviceType device_type; ///< Physical device type
+    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
+    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <tuple>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune this number
+constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+
+class VKMemoryAllocation final {
+public:
+    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
+                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
+          shifted_type{ShiftType(type)}, is_mappable{properties &
+                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
+        if (is_mappable) {
+            const auto dev = device.GetLogical();
+            const auto& dld = device.GetDispatchLoader();
+            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
+        }
+    }
+
+    ~VKMemoryAllocation() {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        if (is_mappable)
+            dev.unmapMemory(memory, dld);
+        dev.free(memory, nullptr, dld);
+    }
+
+    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
+        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
+                                        static_cast<u64>(alignment));
+        if (!found) {
+            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
+                                       static_cast<u64>(alignment));
+            if (!found) {
+                // Signal out of memory, it'll try to do more allocations.
+                return nullptr;
+            }
+        }
+        u8* address = is_mappable ? base_address + *found : nullptr;
+        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+                                                           *found + commit_size);
+        commits.push_back(commit.get());
+
+        // Last commit's address is highly probable to be free.
+        free_iterator = *found + commit_size;
+
+        return commit;
+    }
+
+    void Free(const VKMemoryCommitImpl* commit) {
+        ASSERT(commit);
+        const auto it =
+            std::find_if(commits.begin(), commits.end(),
+                         [&](const auto& stored_commit) { return stored_commit == commit; });
+        if (it == commits.end()) {
+            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
+            UNREACHABLE();
+            return;
+        }
+        commits.erase(it);
+    }
+
+    /// Returns whether this allocation is compatible with the arguments.
+    bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
+        return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
+               (type_mask & shifted_type) != 0;
+    }
+
+private:
+    static constexpr u32 ShiftType(u32 type) {
+        return 1U << type;
+    }
+
+    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
+    /// requeriments.
+    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
+        u64 iterator = start;
+        while (iterator + size < end) {
+            const u64 try_left = Common::AlignUp(iterator, alignment);
+            const u64 try_right = try_left + size;
+
+            bool overlap = false;
+            for (const auto& commit : commits) {
+                const auto [commit_left, commit_right] = commit->interval;
+                if (try_left < commit_right && commit_left < try_right) {
+                    // There's an overlap, continue the search where the overlapping commit ends.
+                    iterator = commit_right;
+                    overlap = true;
+                    break;
+                }
+            }
+            if (!overlap) {
+                // A free address has been found.
+                return try_left;
+            }
+        }
+        // No free regions where found, return an empty optional.
+        return std::nullopt;
+    }
+
+    const VKDevice& device;                   ///< Vulkan device.
+    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
+    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
+    const u64 alloc_size;                     ///< Size of this allocation.
+    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
+    const bool is_mappable;                   ///< Whether the allocation is mappable.
+
+    /// Base address of the mapped pointer.
+    u8* base_address{};
+
+    /// Hints where the next free region is likely going to be.
+    u64 free_iterator{};
+
+    /// Stores all commits done from this allocation.
+    std::vector<const VKMemoryCommitImpl*> commits;
+};
+
+VKMemoryManager::VKMemoryManager(const VKDevice& device)
+    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(props)} {}
+
+VKMemoryManager::~VKMemoryManager() = default;
+
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
+    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+
+    // When a host visible commit is asked, search for host visible and coherent, otherwise search
+    // for a fast device local type.
+    const vk::MemoryPropertyFlags wanted_properties =
+        host_visible
+            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
+            : vk::MemoryPropertyFlagBits::eDeviceLocal;
+
+    const auto TryCommit = [&]() -> VKMemoryCommit {
+        for (auto& alloc : allocs) {
+            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
+                continue;
+
+            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
+                return commit;
+            }
+        }
+        return {};
+    };
+
+    if (auto commit = TryCommit(); commit) {
+        return commit;
+    }
+
+    // Commit has failed, allocate more memory.
+    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
+        // TODO(Rodrigo): Try to use host memory.
+        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
+        UNREACHABLE();
+    }
+
+    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
+    // there's a bug.
+    auto commit = TryCommit();
+    ASSERT(commit);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
+                                  u64 size) {
+    const u32 type = [&]() {
+        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+            const auto flags = props.memoryTypes[type_index].propertyFlags;
+            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
+                // The type matches in type and in the wanted properties.
+                return type_index;
+            }
+        }
+        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
+        UNREACHABLE();
+        return 0u;
+    }();
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    // Try to allocate found type.
+    const vk::MemoryAllocateInfo memory_ai(size, type);
+    vk::DeviceMemory memory;
+    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+        res != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
+        return false;
+    }
+    allocs.push_back(
+        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
+    return true;
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
+    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
+        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+            // Memory is considered unified when heaps are device local only.
+            return false;
+        }
+    }
+    return true;
+}
+
+VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
+                                       u8* data, u64 begin, u64 end)
+    : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {}
+
+VKMemoryCommitImpl::~VKMemoryCommitImpl() {
+    allocation->Free(this);
+}
+
+u8* VKMemoryCommitImpl::GetData() const {
+    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
+    return data;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryAllocation;
+class VKMemoryCommitImpl;
+
+using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
+
+class VKMemoryManager final {
+public:
+    explicit VKMemoryManager(const VKDevice& device);
+    ~VKMemoryManager();
+
+    /**
+     * Commits a memory with the specified requeriments.
+     * @param reqs Requeriments returned from a Vulkan call.
+     * @param host_visible Signals the allocator that it *must* use host visible and coherent
+     * memory. When passing false, it will try to allocate device local memory.
+     * @returns A memory commit.
+     */
+    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
+
+    /// Commits memory required by the buffer and binds it.
+    VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
+
+    /// Commits memory required by the image and binds it.
+    VKMemoryCommit Commit(vk::Image image, bool host_visible);
+
+    /// Returns true if the memory allocations are done always in host visible and coherent memory.
+    bool IsMemoryUnified() const {
+        return is_memory_unified;
+    }
+
+private:
+    /// Allocates a chunk of memory.
+    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
+
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+
+    const VKDevice& device;                                  ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
+    const bool is_memory_unified;                            ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+};
+
+class VKMemoryCommitImpl final {
+    friend VKMemoryAllocation;
+
+public:
+    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
+                                u64 begin, u64 end);
+    ~VKMemoryCommitImpl();
+
+    /// Returns the writeable memory map. The commit has to be mappable.
+    u8* GetData() const;
+
+    /// Returns the Vulkan memory handler.
+    vk::DeviceMemory GetMemory() const {
+        return memory;
+    }
+
+    /// Returns the start position of the commit relative to the allocation.
+    vk::DeviceSize GetOffset() const {
+        return static_cast<vk::DeviceSize>(interval.first);
+    }
+
+private:
+    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
+    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
+    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -0,0 +1,285 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune these numbers.
+constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+constexpr std::size_t FENCES_GROW_STEP = 0x40;
+
+class CommandBufferPool final : public VKFencedPool {
+public:
+    CommandBufferPool(const VKDevice& device)
+        : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
+
+    void Allocate(std::size_t begin, std::size_t end) {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        const u32 graphics_family = device.GetGraphicsFamily();
+
+        auto pool = std::make_unique<Pool>();
+
+        // Command buffers are going to be commited, recorded, executed every single usage cycle.
+        // They are also going to be reseted when commited.
+        const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient |
+                                vk::CommandPoolCreateFlagBits::eResetCommandBuffer;
+        const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family);
+        pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld);
+
+        const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle,
+                                                      vk::CommandBufferLevel::ePrimary,
+                                                      static_cast<u32>(COMMAND_BUFFER_POOL_SIZE));
+        pool->cmdbufs =
+            dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld);
+
+        pools.push_back(std::move(pool));
+    }
+
+    vk::CommandBuffer Commit(VKFence& fence) {
+        const std::size_t index = CommitResource(fence);
+        const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+        const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+        return *pools[pool_index]->cmdbufs[sub_index];
+    }
+
+private:
+    struct Pool {
+        UniqueCommandPool handle;
+        std::vector<UniqueCommandBuffer> cmdbufs;
+    };
+
+    const VKDevice& device;
+
+    std::vector<std::unique_ptr<Pool>> pools;
+};
+
+VKResource::VKResource() = default;
+
+VKResource::~VKResource() = default;
+
+VKFence::VKFence(const VKDevice& device, UniqueFence handle)
+    : device{device}, handle{std::move(handle)} {}
+
+VKFence::~VKFence() = default;
+
+void VKFence::Wait() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+}
+
+void VKFence::Release() {
+    is_owned = false;
+}
+
+void VKFence::Commit() {
+    is_owned = true;
+    is_used = true;
+}
+
+bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
+    if (!is_used) {
+        // If a fence is not used it's always free.
+        return true;
+    }
+    if (is_owned && !owner_wait) {
+        // The fence is still being owned (Release has not been called) and ownership wait has
+        // not been asked.
+        return false;
+    }
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    if (gpu_wait) {
+        // Wait for the fence if it has been requested.
+        dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld);
+    } else {
+        if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) {
+            // Vulkan fence is not ready, not much it can do here
+            return false;
+        }
+    }
+
+    // Broadcast resources their free state.
+    for (auto* resource : protected_resources) {
+        resource->OnFenceRemoval(this);
+    }
+    protected_resources.clear();
+
+    // Prepare fence for reusage.
+    dev.resetFences({*handle}, dld);
+    is_used = false;
+    return true;
+}
+
+void VKFence::Protect(VKResource* resource) {
+    protected_resources.push_back(resource);
+}
+
+void VKFence::Unprotect(const VKResource* resource) {
+    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
+    if (it != protected_resources.end()) {
+        protected_resources.erase(it);
+    }
+}
+
+VKFenceWatch::VKFenceWatch() = default;
+
+VKFenceWatch::~VKFenceWatch() {
+    if (fence) {
+        fence->Unprotect(this);
+    }
+}
+
+void VKFenceWatch::Wait() {
+    if (!fence) {
+        return;
+    }
+    fence->Wait();
+    fence->Unprotect(this);
+    fence = nullptr;
+}
+
+void VKFenceWatch::Watch(VKFence& new_fence) {
+    Wait();
+    fence = &new_fence;
+    fence->Protect(this);
+}
+
+bool VKFenceWatch::TryWatch(VKFence& new_fence) {
+    if (fence) {
+        return false;
+    }
+    fence = &new_fence;
+    fence->Protect(this);
+    return true;
+}
+
+void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
+    ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
+    fence = nullptr;
+}
+
+VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
+
+VKFencedPool::~VKFencedPool() = default;
+
+std::size_t VKFencedPool::CommitResource(VKFence& fence) {
+    const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
+        for (std::size_t iterator = begin; iterator < end; ++iterator) {
+            if (watches[iterator]->TryWatch(fence)) {
+                // The resource is now being watched, a free resource was successfully found.
+                return iterator;
+            }
+        }
+        return {};
+    };
+    // Try to find a free resource from the hinted position to the end.
+    auto found = Search(free_iterator, watches.size());
+    if (!found) {
+        // Search from beginning to the hinted position.
+        found = Search(0, free_iterator);
+        if (!found) {
+            // Both searches failed, the pool is full; handle it.
+            const std::size_t free_resource = ManageOverflow();
+
+            // Watch will wait for the resource to be free.
+            watches[free_resource]->Watch(fence);
+            found = free_resource;
+        }
+    }
+    // Free iterator is hinted to the resource after the one that's been commited.
+    free_iterator = (*found + 1) % watches.size();
+    return *found;
+}
+
+std::size_t VKFencedPool::ManageOverflow() {
+    const std::size_t old_capacity = watches.size();
+    Grow();
+
+    // The last entry is guaranted to be free, since it's the first element of the freshly
+    // allocated resources.
+    return old_capacity;
+}
+
+void VKFencedPool::Grow() {
+    const std::size_t old_capacity = watches.size();
+    watches.resize(old_capacity + grow_step);
+    std::generate(watches.begin() + old_capacity, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+    Allocate(old_capacity, old_capacity + grow_step);
+}
+
+VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
+    GrowFences(FENCES_GROW_STEP);
+    command_buffer_pool = std::make_unique<CommandBufferPool>(device);
+}
+
+VKResourceManager::~VKResourceManager() = default;
+
+VKFence& VKResourceManager::CommitFence() {
+    const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
+        const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
+        const auto hinted = fences.begin() + fences_iterator;
+
+        auto it = std::find_if(hinted, fences.end(), Tick);
+        if (it == fences.end()) {
+            it = std::find_if(fences.begin(), hinted, Tick);
+            if (it == hinted) {
+                return nullptr;
+            }
+        }
+        fences_iterator = std::distance(fences.begin(), it) + 1;
+        if (fences_iterator >= fences.size())
+            fences_iterator = 0;
+
+        auto& fence = *it;
+        fence->Commit();
+        return fence.get();
+    };
+
+    VKFence* found_fence = StepFences(false, false);
+    if (!found_fence) {
+        // Try again, this time waiting.
+        found_fence = StepFences(true, false);
+
+        if (!found_fence) {
+            // Allocate new fences and try again.
+            LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
+                     fences.size() + FENCES_GROW_STEP);
+
+            GrowFences(FENCES_GROW_STEP);
+            found_fence = StepFences(true, false);
+            ASSERT(found_fence != nullptr);
+        }
+    }
+    return *found_fence;
+}
+
+vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
+    return command_buffer_pool->Commit(fence);
+}
+
+void VKResourceManager::GrowFences(std::size_t new_fences_count) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::FenceCreateInfo fence_ci;
+
+    const std::size_t previous_size = fences.size();
+    fences.resize(previous_size + new_fences_count);
+
+    std::generate(fences.begin() + previous_size, fences.end(), [&]() {
+        return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld));
+    });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -0,0 +1,180 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKResourceManager;
+
+class CommandBufferPool;
+
+/// Interface for a Vulkan resource
+class VKResource {
+public:
+    explicit VKResource();
+    virtual ~VKResource();
+
+    /**
+     * Signals the object that an owning fence has been signaled.
+     * @param signaling_fence Fence that signals its usage end.
+     */
+    virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
+};
+
+/**
+ * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
+ * They must be commited from the resource manager. Their usage flow is: commit the fence from the
+ * resource manager, protect resources with it and use them, send the fence to an execution queue
+ * and Wait for it if needed and then call Release. Used resources will automatically be signaled
+ * when they are free to be reused.
+ * @brief Protects resources for concurrent usage and signals its release.
+ */
+class VKFence {
+    friend class VKResourceManager;
+
+public:
+    explicit VKFence(const VKDevice& device, UniqueFence handle);
+    ~VKFence();
+
+    /**
+     * Waits for the fence to be signaled.
+     * @warning You must have ownership of the fence and it has to be previously sent to a queue to
+     * call this function.
+     */
+    void Wait();
+
+    /**
+     * Releases ownership of the fence. Pass after it has been sent to an execution queue.
+     * Unmanaged usage of the fence after the call will result in undefined behavior because it may
+     * be being used for something else.
+     */
+    void Release();
+
+    /// Protects a resource with this fence.
+    void Protect(VKResource* resource);
+
+    /// Removes protection for a resource.
+    void Unprotect(const VKResource* resource);
+
+    /// Retreives the fence.
+    operator vk::Fence() const {
+        return *handle;
+    }
+
+private:
+    /// Take ownership of the fence.
+    void Commit();
+
+    /**
+     * Updates the fence status.
+     * @warning Waiting for the owner might soft lock the execution.
+     * @param gpu_wait Wait for the fence to be signaled by the driver.
+     * @param owner_wait Wait for the owner to signal its freedom.
+     * @returns True if the fence is free. Waiting for gpu and owner will always return true.
+     */
+    bool Tick(bool gpu_wait, bool owner_wait);
+
+    const VKDevice& device;                       ///< Device handler
+    UniqueFence handle;                           ///< Vulkan fence
+    std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
+    bool is_owned = false; ///< The fence has been commited but not released yet.
+    bool is_used = false;  ///< The fence has been commited but it has not been checked to be free.
+};
+
+/**
+ * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
+ * resources without having to inherit VKResource from their handlers.
+ */
+class VKFenceWatch final : public VKResource {
+public:
+    explicit VKFenceWatch();
+    ~VKFenceWatch();
+
+    /// Waits for the fence to be released.
+    void Wait();
+
+    /**
+     * Waits for a previous fence and watches a new one.
+     * @param new_fence New fence to wait to.
+     */
+    void Watch(VKFence& new_fence);
+
+    /**
+     * Checks if it's currently being watched and starts watching it if it's available.
+     * @returns True if a watch has started, false if it's being watched.
+     */
+    bool TryWatch(VKFence& new_fence);
+
+    void OnFenceRemoval(VKFence* signaling_fence) override;
+
+private:
+    VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
+};
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class VKFencedPool {
+public:
+    explicit VKFencedPool(std::size_t grow_step);
+    virtual ~VKFencedPool();
+
+protected:
+    /**
+     * Commits a free resource and protects it with a fence. It may allocate new resources.
+     * @param fence Fence that protects the commited resource.
+     * @returns Index of the resource commited.
+     */
+    std::size_t CommitResource(VKFence& fence);
+
+    /// Called when a chunk of resources have to be allocated.
+    virtual void Allocate(std::size_t begin, std::size_t end) = 0;
+
+private:
+    /// Manages pool overflow allocating new resources.
+    std::size_t ManageOverflow();
+
+    /// Allocates a new page of resources.
+    void Grow();
+
+    std::size_t grow_step = 0;     ///< Number of new resources created after an overflow
+    std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
+};
+
+/**
+ * The resource manager handles all resources that can be protected with a fence avoiding
+ * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
+ */
+class VKResourceManager final {
+public:
+    explicit VKResourceManager(const VKDevice& device);
+    ~VKResourceManager();
+
+    /// Commits a fence. It has to be sent to a queue and released.
+    VKFence& CommitFence();
+
+    /// Commits an unused command buffer and protects it with a fence.
+    vk::CommandBuffer CommitCommandBuffer(VKFence& fence);
+
+private:
+    /// Allocates new fences.
+    void GrowFences(std::size_t new_fences_count);
+
+    const VKDevice& device;          ///< Device handler.
+    std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
+    std::vector<std::unique_ptr<VKFence>> fences;           ///< Pool of fences.
+    std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager} {
+    next_fence = &resource_manager.CommitFence();
+    AllocateNewContext();
+}
+
+VKScheduler::~VKScheduler() = default;
+
+VKExecutionContext VKScheduler::GetExecutionContext() const {
+    return VKExecutionContext(current_fence, current_cmdbuf);
+}
+
+VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Wait();
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.end(dld);
+
+    const auto queue = device.GetGraphicsQueue();
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+                                     &semaphore);
+    queue.submit({submit_info}, *current_fence, dld);
+}
+
+void VKScheduler::AllocateNewContext() {
+    current_fence = next_fence;
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    next_fence = &resource_manager.CommitFence();
+
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+}
+
+} // namespace Vulkan
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
ReinUsesLisp	5ca63d0675	shader/decode: Remove extras from MetaTexture	2019-02-26 00:11:30 -03:00
ReinUsesLisp	48e6f77c03	shader/decode: Split memory and texture instructions decoding	2019-02-26 00:11:30 -03:00
bunnei	c3471bf618	Merge pull request #2156 from FreddyFunk/patch-1 file_sys/vfs_vector: Fix ignored offset on Write	2019-02-25 18:28:58 -05:00
bunnei	da1b45de34	Merge pull request #2158 from lioncash/table service/vi: Update IManagerDisplayService's function table	2019-02-25 18:27:43 -05:00
bunnei	1cffd3848b	Merge pull request #2160 from lioncash/audio-warn audio_core: Resolve compilation warnings	2019-02-25 18:25:36 -05:00
bunnei	93c1630570	Merge pull request #2159 from lioncash/warn shader/track: Resolve variable shadowing warnings	2019-02-25 13:26:00 -05:00
Lioncash	04d7b7e09d	audio_core/cubeb_sink: Initialize CubebSinkStream's last_frame data member Ensures that all member variables are initialized in a deterministic manner across the board.	2019-02-25 09:40:37 -05:00
Lioncash	8250f9bb1c	audio_core/cubeb_sink: Add override specifier to destructor CubebSinkStream inherits from a base class with a virtual destructor, so override can be appended to CubebSinkStream's destructor.	2019-02-25 09:38:27 -05:00
Lioncash	7cdeec20ec	audio_core/cubeb_sink: Resolve variable shadowing warnings in SamplesInQueue The name of the parameter was shadowing the member variable of the same name. Instead, alter the name of the parameter to prevent said shadowing.	2019-02-25 09:28:51 -05:00
Lioncash	a12f4efa2f	audio_core/codec: Resolve truncation warnings within DecodeADPCM The assignments here were performing an implicit truncation from int to s16. Make it explicit that this is desired behavior.	2019-02-25 09:24:39 -05:00
Lioncash	c1b2e35625	shader/track: Resolve variable shadowing warnings	2019-02-25 09:10:59 -05:00
Lioncash	be7dad5e7e	service/vi: Update IManagerDisplayService's function table Amends it to add the 7.0.0+ CreateStrayLayer function.	2019-02-25 08:09:00 -05:00
bunnei	c07987dfab	Merge pull request #2118 from FernandoS27/ipa-improve shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-24 23:04:22 -05:00
bunnei	c4243c07cc	Merge pull request #2119 from FernandoS27/fix-copy rasterizer_cache_gl: Only do fast layered copy on the same format.	2019-02-24 23:03:52 -05:00
bunnei	c6170565b5	Merge pull request #2155 from FearlessTobi/port-4655 Port citra-emu/citra#4655: "Remove GCC version checks"	2019-02-24 23:03:13 -05:00
bunnei	57985fb16a	Merge pull request #2144 from lioncash/factor service/vi: Convert Display and Layer structs into classes	2019-02-24 23:02:50 -05:00
Frederic L	517933adcb	file_sys/vfs_vector: Fix ignored offset on Write	2019-02-25 00:27:49 +01:00
tgsm	030814b1cb	Remove GCC version checks Citra can't be compiled using GCC <7 because of required C++17 support, so these version checks don't need to exist anymore.	2019-02-24 15:24:06 +01:00
bunnei	90c780e6f3	Merge pull request #2139 from degasus/dma_pusher video_core/dma_pusher: The full list of headers at once.	2019-02-24 04:15:49 -05:00
bunnei	f7090bacc5	Merge pull request #2146 from ReinUsesLisp/vulkan-scheduler vk_scheduler: Implement a scheduler	2019-02-23 23:32:43 -05:00
bunnei	d062991643	Merge pull request #2150 from ReinUsesLisp/fixup-layer-swizzle gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:31:34 -05:00
bunnei	4ab978d670	Merge pull request #2151 from ReinUsesLisp/fixup-vk-memory-manager vk_memory_manager: Fixup commit interval allocation	2019-02-23 23:29:53 -05:00
ReinUsesLisp	92050c4d86	vk_memory_manager: Fixup commit interval allocation VKMemoryCommitImpl was using as the end of its interval "begin + end". That ended up wasting memory.	2019-02-24 01:04:41 -03:00
ReinUsesLisp	abef11a540	gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:27:30 -03:00
ReinUsesLisp	f546fb35ed	vk_scheduler: Implement a scheduler The scheduler abstracts command buffer and fence management with an interface that's able to do OpenGL-like operations on Vulkan command buffers. It returns by value a command buffer and fence that have to be used for subsequent operations until Flush or Finish is executed, after that the current execution context (the pair of command buffers and fences) gets invalidated a new one must be fetched. Thankfully validation layers will quickly detect if this is skipped throwing an error due to modifications to a sent command buffer.	2019-02-22 01:33:32 -03:00
bunnei	94b27bb8a5	Merge pull request #2138 from ReinUsesLisp/vulkan-memory-manager vk_memory_manager: Implement memory manager	2019-02-21 22:26:54 -05:00
Lioncash	90528f1326	service/nvflinger: Store BufferQueue instances as regular data members The NVFlinger service is already passed into services that need to guarantee its lifetime, so the BufferQueue instances will already live as long as they're needed. Making them std::shared_ptr instances in this case is unnecessary.	2019-02-21 22:09:46 -05:00
Lioncash	fd15730767	service/vi/vi_layer: Convert Layer struct into a class Like the previous changes made to the Display struct, this prepares the Layer struct for changes to its interface. Given Layer will be given more invariants in the future, we convert it into a class to better signify that.	2019-02-21 12:13:09 -05:00
Lioncash	fa4dc2cf42	service/nvflinger: Move display specifics over to vi_display With the display and layer structures relocated to the vi service, we can begin giving these a proper interface before beginning to properly support the display types. This converts the display struct into a class and provides it with the necessary functions to preserve behavior within the NVFlinger class.	2019-02-21 12:13:04 -05:00
bunnei	9539c4203b	Merge pull request #2125 from ReinUsesLisp/fixup-glstate gl_state: Synchronize gl_state even when state is disabled	2019-02-20 21:47:46 -05:00
bunnei	ae437320c8	Merge pull request #2130 from lioncash/system_engine video_core: Remove usages of System::GetInstance() within the engines	2019-02-20 21:24:56 -05:00
Jungy	3273f93cd5	Fixes Unicode Key File Directories (#2120 ) * Fixes Unicode Key File Directories Adds code so that when loading a file it converts to UTF16 first, to ensure the files can be opened. Code borrowed from FileUtil::Exists. * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com> * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com> * Using FileUtil instead to be cleaner. * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com>	2019-02-20 21:24:25 -05:00
bunnei	ef559f5741	Merge pull request #2142 from lioncash/relocate service/nvflinger: Relocate definitions of Layer and Display to the vi service	2019-02-20 21:21:55 -05:00
Lioncash	8d5d369b54	service/nvflinger: Relocate definitions of Layer and Display to the vi service These are more closely related to the vi service as opposed to the intermediary nvflinger. This also places them in their relevant subfolder, as future changes to these will likely result in subclassing to represent various displays and services, as they're done within the service itself on hardware. The reasoning for prefixing the display and layer source files is to avoid potential clashing if two files with the same name are compiled (e.g. if 'display.cpp/.h' or 'layer.cpp/.h' is added to another service at any point), which MSVC will actually warn against. This prevents that case from occurring. This also presently coverts the std::array introduced within `f45c25aaba` back to a std::vector to allow the forward declaration of the Display type. Forward declaring a type within a std::vector is allowed since the introduction of N4510 (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4510.html) by Zhihao Yuan.	2019-02-19 18:27:16 -05:00
Markus Wick	6dd40976d0	video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit.	2019-02-19 10:28:42 +01:00
Markus Wick	717394c980	video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10.	2019-02-19 09:58:38 +01:00
ReinUsesLisp	b675c97cdd	vk_memory_manager: Implement memory manager A memory manager object handles the memory allocations for a device. It allocates chunks of Vulkan memory objects and then suballocates.	2019-02-19 03:42:28 -03:00
bunnei	4bce08d497	Merge pull request #2122 from ReinUsesLisp/vulkan-resource-manager vk_resource_manager: Implement fence and command buffer allocator	2019-02-18 21:05:28 -05:00
bunnei	2bb02a0b78	Merge pull request #2134 from lioncash/naming audio_core/buffer: Make const and non-const getter for samples consistent	2019-02-17 11:26:33 -05:00
bunnei	e869c5ef1a	Merge pull request #2133 from lioncash/arbiter address_arbiter: Use nested namespaces where applicable	2019-02-16 15:37:21 -05:00
bunnei	4699fdca8f	Merge pull request #2127 from FearlessTobi/fix-screenshot-srgb renderer_opengl: respect the sRGB colorspace for the screenshot feature	2019-02-16 15:36:00 -05:00
bunnei	cd7e1183e2	Merge pull request #2128 from FearlessTobi/port-4197 Port citra-emu/citra#4197: "threadsafe_queue: Add PopWait and use it where possible "	2019-02-16 15:34:49 -05:00
Lioncash	b009bda67a	audio_core/buffer: Make const and non-const getter for samples consistent This way proper const/non-const selection can occur.	2019-02-16 15:21:35 -05:00
Lioncash	0113c36300	address_arbiter: Use nested namespaces where applicable A fairly trivial change. Other sections of the codebase use nested namespaces instead of separate namespaces here. This one must have just been overlooked.	2019-02-16 12:41:30 -05:00
Lioncash	a8fa5019b5	video_core: Remove usages of System::GetInstance() within the engines Avoids the use of the global accessor in favor of explicitly making the system a dependency within the interface.	2019-02-15 22:06:23 -05:00
James Rowe	99da6362c4	Merge pull request #2123 from lioncash/coretiming-global core_timing: De-globalize core_timing facilities	2019-02-15 19:52:11 -07:00
Lioncash	bd983414f6	core_timing: Convert core timing into a class Gets rid of the largest set of mutable global state within the core. This also paves a way for eliminating usages of GetInstance() on the System class as a follow-up. Note that no behavioral changes have been made, and this simply extracts the functionality into a class. This also has the benefit of making dependencies on the core timing functionality explicit within the relevant interfaces.	2019-02-15 21:50:25 -05:00
B3n30	2195f10d15	Adressed review comments	2019-02-15 22:14:54 +01:00
B3n30	4154936568	threadsafe_queue: Add WaitIfEmpty and use it in logging	2019-02-15 22:12:54 +01:00
fearlessTobi	9a56b99fa4	renderer_opengl: respect the sRGB colorspace for the screenshot feature Previously, we were completely ignoring for screenshots whether the game uses RGB or sRGB. This resulted in screenshot colors that looked off for some titles.	2019-02-15 21:27:29 +01:00
ReinUsesLisp	8dfc81239f	gl_state: Synchronize gl_state even when state is disabled There are some potential edge cases where gl_state may fail to track the state if a related state changes while the toggle is disabled or it didn't change. This addresses that.	2019-02-15 01:30:14 -03:00
bunnei	4327f430f1	Merge pull request #2112 from lioncash/shadowing gl_rasterizer_cache: Get rid of variable shadowing	2019-02-14 21:45:20 -05:00
bunnei	a8fc5d6edd	Merge pull request #2111 from ReinUsesLisp/fetch-fix gl_shader_decompiler: Re-implement TLDS lod	2019-02-14 21:42:34 -05:00
ReinUsesLisp	ae6c052ed9	vk_resource_manager: Implement a command buffer pool with VKFencedPool	2019-02-14 18:44:26 -03:00
ReinUsesLisp	a2b6de7e9f	vk_resource_manager: Add VKFencedPool interface Handles a pool of resources protected by fences. Manages resource overflow allocating more resources. This class is intended to be used through inheritance.	2019-02-14 18:44:26 -03:00
ReinUsesLisp	0ffdd0a683	vk_resource_manager: Implement VKResourceManager and fence allocator CommitFence iterates a pool of fences until one is found. If all fences are being used at the same time, allocate more.	2019-02-14 18:44:26 -03:00
ReinUsesLisp	aa0b6babda	vk_resource_manager: Implement VKFenceWatch A fence watch is used to keep track of the usage of a fence and protect a resource or set of resources without having to inherit from their handlers.	2019-02-14 18:44:26 -03:00
ReinUsesLisp	25c2fe1c6b	vk_resource_manager: Implement VKFence Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access. They must be commited from the resource manager. Their usage flow is: commit the fence from the resource manager, protect resources with it and use them, send the fence to an execution queue and Wait for it if needed and then call Release. Used resources will automatically be signaled when they are free to be reused.	2019-02-14 18:44:26 -03:00
ReinUsesLisp	33a4cebc22	vk_resource_manager: Add VKResource interface VKResource is an interface that gets signaled by a fence when it is free to be reused.	2019-02-14 18:36:15 -03:00
bunnei	fcc3aa0bbf	Merge pull request #2113 from ReinUsesLisp/vulkan-base vulkan: Add dependencies and device abstraction	2019-02-14 10:06:48 -05:00
Fernando Sahmkow	10682ad7e0	shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-14 03:25:07 -04:00
bunnei	8490e7746a	Merge pull request #2115 from lioncash/local core_timing: Make EmptyTimedCallback a local variable	2019-02-13 21:42:04 -05:00
bunnei	f0c4ac9abd	Merge pull request #2116 from lioncash/size threadsafe_queue: Remove NeedSize template parameter	2019-02-13 21:41:25 -05:00
Fernando Sahmkow	bb41683394	rasterizer_cache_gl: Only do fast layered copy on the same format. As glCopyImageSubData does not support different formats.	2019-02-13 16:55:00 -04:00
Lioncash	0829ef97ca	threadsafe_queue: Use std::size_t for representing size Makes it consistent with the regular standard containers in terms of size representation. This also gets rid of dependence on our own type aliases, removing the need for an include.	2019-02-12 22:39:53 -05:00
Lioncash	f0bfb24c61	threadsafe_queue: Remove NeedSize template parameter The necessity of this parameter is dubious at best, and in 2019 probably offers completely negligible savings as opposed to just leaving this enabled. This removes it and simplifies the overall interface.	2019-02-12 22:09:51 -05:00
Lioncash	83ba3515ec	core_timing: Make EmptyTimedCallback a local variable Given this is only used in one place, it can be moved closest to its usage site.	2019-02-12 21:47:18 -05:00
ReinUsesLisp	8beca060d1	vk_device: Abstract device handling into a class VKDevice contains all the data required to manage and initialize a physical device. Its intention is to be passed across Vulkan objects to query device-specific data (for example the logical device and the dispatch loader).	2019-02-12 21:43:02 -03:00
Lioncash	054e39647c	gl_rasterizer_cache: Remove unnecessary newline	2019-02-12 16:56:19 -05:00
Lioncash	e25c464c02	gl_rasterizer_cache: Get rid of variable shadowing Avoids shadowing the members of the struct itself, which results in a -Wshadow warning.	2019-02-12 16:46:39 -05:00
ReinUsesLisp	18fe910957	renderer_vulkan: Add declarations file This file is intended to be included instead of vulkan/vulkan.hpp. It includes declarations of unique handlers using a dynamic dispatcher instead of a static one (which would require linking to a Vulkan library).	2019-02-12 18:33:02 -03:00
ReinUsesLisp	b12ab4d805	logging: Add Vulkan backend logging class type	2019-02-12 18:33:02 -03:00
ReinUsesLisp	cc94a6d101	cmake: Add Vulkan option	2019-02-12 18:33:02 -03:00
ReinUsesLisp	afb8af9853	gitmodules: Add Vulkan headers dependency	2019-02-12 18:33:02 -03:00
ReinUsesLisp	e60d4d70bc	gl_shader_decompiler: Re-implement TLDS lod	2019-02-12 17:03:07 -03:00