gl_texture_cache: Use local variables to simplify DownloadTexture

gl_texture_cache: Fix format for RGBX16F
gl_texture_cache: Use Snorm internal format for RG8S
2020-01-14 17:39:48 -03:00 · 2020-01-14 17:38:33 -03:00 · 2020-01-14 17:37:58 -03:00 · 2020-01-14 17:37:23 -03:00 · 2020-01-14 16:15:18 -03:00 · 2020-01-14 16:14:47 -03:00
53 changed files with 2791 additions and 440 deletions
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -141,6 +141,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
    config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
    config.page_table_address_space_bits = address_space_bits;
    config.silently_mirror_page_table = false;
+    config.absolute_offset_page_table = true;

    // Multi-process state
    config.processor_id = core_index;
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -317,6 +317,8 @@ void Process::FreeTLSRegion(VAddr tls_address) {
 }

 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
+    code_memory_size += module_.memory.size();
+
    const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));

    const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
@@ -332,8 +334,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
    MapSegment(module_.CodeSegment(), VMAPermission::ReadExecute, MemoryState::Code);
    MapSegment(module_.RODataSegment(), VMAPermission::Read, MemoryState::CodeData);
    MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
-
-    code_memory_size += module_.memory.size();
 }

 Process::Process(Core::System& system)
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/kernel/writable_event.h"
 #include "core/hle/service/nifm/nifm.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"

 namespace Service::NIFM {

@@ -86,7 +87,12 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.PushEnum(RequestState::Connected);
+
+        if (Settings::values.bcat_backend == "none") {
+            rb.PushEnum(RequestState::NotSubmitted);
+        } else {
+            rb.PushEnum(RequestState::Connected);
+        }
    }

    void GetResult(Kernel::HLERequestContext& ctx) {
@@ -194,14 +200,22 @@ private:

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    void IsAnyInternetRequestAccepted(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_NIFM, "(STUBBED) called");

        IPC::ResponseBuilder rb{ctx, 3};
        rb.Push(RESULT_SUCCESS);
-        rb.Push<u8>(1);
+        if (Settings::values.bcat_backend == "none") {
+            rb.Push<u8>(0);
+        } else {
+            rb.Push<u8>(1);
+        }
    }
    Core::System& system;
 };
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,12 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
    return layer_id;
 }

+void NVFlinger::CloseLayer(u64 layer_id) {
+    for (auto& display : displays) {
+        display.CloseLayer(layer_id);
+    }
+}
+
 std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
    const auto* const layer = FindLayer(display_id, layer_id);

@@ -192,7 +198,7 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
            const auto& fence = multi_fence.fences[fence_id];
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -54,6 +54,9 @@ public:
    /// If an invalid display ID is specified, then an empty optional is returned.
    std::optional<u64> CreateLayer(u64 display_id);

+    /// Closes a layer on all displays for the given layer ID.
+    void CloseLayer(u64 layer_id);
+
    /// Finds the buffer queue ID of the specified layer in the specified display.
    ///
    /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -24,11 +24,11 @@ Display::Display(u64 id, std::string name, Core::System& system) : id{id}, name{
 Display::~Display() = default;

 Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
+    return *layers.at(index);
 }

 const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
+    return *layers.at(index);
 }

 std::shared_ptr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
@@ -43,29 +43,38 @@ void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
    // TODO(Subv): Support more than 1 layer.
    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");

-    layers.emplace_back(id, buffer_queue);
+    layers.emplace_back(std::make_shared<Layer>(id, buffer_queue));
+}
+
+void Display::CloseLayer(u64 id) {
+    layers.erase(
+        std::remove_if(layers.begin(), layers.end(),
+                       [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; }),
+        layers.end());
 }

 Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+    const auto itr =
+        std::find_if(layers.begin(), layers.end(),
+                     [id](const std::shared_ptr<Layer>& layer) { return layer->GetID() == id; });

    if (itr == layers.end()) {
        return nullptr;
    }

-    return &*itr;
+    return itr->get();
 }

 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <vector>

@@ -69,6 +70,12 @@ public:
    ///
    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);

+    /// Closes and removes a layer from this display with the given ID.
+    ///
+    /// @param id           The ID assigned to the layer to close.
+    ///
+    void CloseLayer(u64 id);
+
    /// Attempts to find a layer with the given ID.
    ///
    /// @param id The layer ID.
@@ -91,7 +98,7 @@ private:
    u64 id;
    std::string name;

-    std::vector<Layer> layers;
+    std::vector<std::shared_ptr<Layer>> layers;
    Kernel::EventPair vsync_event;
 };

--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -1066,6 +1066,18 @@ private:
        rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
    }

+    void CloseLayer(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        const auto layer_id{rp.Pop<u64>()};
+
+        LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}", layer_id);
+
+        nv_flinger->CloseLayer(layer_id);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
    void CreateStrayLayer(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};
        const u32 flags = rp.Pop<u32>();
@@ -1178,7 +1190,7 @@ IApplicationDisplayService::IApplicationDisplayService(
        {1101, &IApplicationDisplayService::SetDisplayEnabled, "SetDisplayEnabled"},
        {1102, &IApplicationDisplayService::GetDisplayResolution, "GetDisplayResolution"},
        {2020, &IApplicationDisplayService::OpenLayer, "OpenLayer"},
-        {2021, nullptr, "CloseLayer"},
+        {2021, &IApplicationDisplayService::CloseLayer, "CloseLayer"},
        {2030, &IApplicationDisplayService::CreateStrayLayer, "CreateStrayLayer"},
        {2031, &IApplicationDisplayService::DestroyStrayLayer, "DestroyStrayLayer"},
        {2101, &IApplicationDisplayService::SetLayerScalingMode, "SetLayerScalingMode"},
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -146,7 +146,7 @@ struct Memory::Impl {
    u8* GetPointer(const VAddr vaddr) {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
-            return page_pointer + (vaddr & PAGE_MASK);
+            return page_pointer + vaddr;
        }

        if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
@@ -229,7 +229,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                const u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* const src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_buffer, src_ptr, copy_amount);
                break;
            }
@@ -276,7 +277,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* const dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* const dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memcpy(dest_ptr, src_buffer, copy_amount);
                break;
            }
@@ -322,7 +324,8 @@ struct Memory::Impl {
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);

-                u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                u8* dest_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                std::memset(dest_ptr, 0, copy_amount);
                break;
            }
@@ -368,7 +371,8 @@ struct Memory::Impl {
            }
            case Common::PageType::Memory: {
                DEBUG_ASSERT(page_table.pointers[page_index]);
-                const u8* src_ptr = page_table.pointers[page_index] + page_offset;
+                const u8* src_ptr =
+                    page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
                WriteBlock(process, dest_addr, src_ptr, copy_amount);
                break;
            }
@@ -446,7 +450,8 @@ struct Memory::Impl {
                        page_type = Common::PageType::Unmapped;
                    } else {
                        page_type = Common::PageType::Memory;
-                        current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
+                        current_page_table->pointers[vaddr >> PAGE_BITS] =
+                            pointer - (vaddr & ~PAGE_MASK);
                    }
                    break;
                }
@@ -493,7 +498,9 @@ struct Memory::Impl {
                      memory);
        } else {
            while (base != end) {
-                page_table.pointers[base] = memory;
+                page_table.pointers[base] = memory - (base << PAGE_BITS);
+                ASSERT_MSG(page_table.pointers[base],
+                           "memory mapping base yield a nullptr within the table");

                base += 1;
                memory += PAGE_SIZE;
@@ -518,7 +525,7 @@ struct Memory::Impl {
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
            T value;
-            std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
+            std::memcpy(&value, &page_pointer[vaddr], sizeof(T));
            return value;
        }

@@ -559,7 +566,7 @@ struct Memory::Impl {
        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
        if (page_pointer != nullptr) {
            // NOTE: Avoid adding any extra logic to this fast-path block
-            std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
+            std::memcpy(&page_pointer[vaddr], &data, sizeof(T));
            return;
        }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -155,12 +155,25 @@ if (ENABLE_VULKAN)
        renderer_vulkan/maxwell_to_vk.h
        renderer_vulkan/vk_buffer_cache.cpp
        renderer_vulkan/vk_buffer_cache.h
+        renderer_vulkan/vk_compute_pass.cpp
+        renderer_vulkan/vk_compute_pass.h
+        renderer_vulkan/vk_compute_pipeline.cpp
+        renderer_vulkan/vk_compute_pipeline.h
+        renderer_vulkan/vk_descriptor_pool.cpp
+        renderer_vulkan/vk_descriptor_pool.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_graphics_pipeline.cpp
+        renderer_vulkan/vk_graphics_pipeline.h
        renderer_vulkan/vk_image.cpp
        renderer_vulkan/vk_image.h
        renderer_vulkan/vk_memory_manager.cpp
        renderer_vulkan/vk_memory_manager.h
+        renderer_vulkan/vk_pipeline_cache.cpp
+        renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_rasterizer.h
+        renderer_vulkan/vk_renderpass_cache.cpp
+        renderer_vulkan/vk_renderpass_cache.h
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_sampler_cache.cpp
@@ -169,12 +182,16 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_scheduler.h
        renderer_vulkan/vk_shader_decompiler.cpp
        renderer_vulkan/vk_shader_decompiler.h
+        renderer_vulkan/vk_shader_util.cpp
+        renderer_vulkan/vk_shader_util.h
        renderer_vulkan/vk_staging_buffer_pool.cpp
        renderer_vulkan/vk_staging_buffer_pool.h
        renderer_vulkan/vk_stream_buffer.cpp
        renderer_vulkan/vk_stream_buffer.h
        renderer_vulkan/vk_swapchain.cpp
-        renderer_vulkan/vk_swapchain.h)
+        renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_update_descriptor.cpp
+        renderer_vulkan/vk_update_descriptor.h)

    target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
    // Synced GPU, is always in sync
    if (!is_async) {
        return;
    }
    MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }

 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
    syncpoints[syncpoint_id]++;
    std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
    if (!syncpt_interrupts[syncpoint_id].empty()) {
        u32 value = syncpoints[syncpoint_id].load();
        auto it = syncpt_interrupts[syncpoint_id].begin();
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
    virtual void WaitIdle() const = 0;

    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);

    void IncrementSyncPoint(u32 syncpoint_id);

@@ -312,6 +313,8 @@ private:

    std::mutex sync_mutex;

+    std::condition_variable sync_cv;
+
    const bool is_async;
 };

--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;

 namespace {

-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;

@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
    if (!code_b.empty()) {
        ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
    }
-    const auto entries = GLShader::GetEntries(ir);

    std::string source = fmt::format(R"(// {}
 #version 430 core
@@ -314,9 +310,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {

 CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
                           GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
-    : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
-      device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
-      shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, shader_type{shader_type},
+      entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
    if (!params.precompiled_variants) {
        return;
    }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -751,6 +751,9 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            if (operation_index >= operation_decompilers.size()) {
                UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -872,6 +875,9 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
            ++code.scope;
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {

 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
    {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
        glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
        glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
        const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        u8* const mip_data = staging_buffer.data() + mip_offset;
+        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
        if (is_compressed) {
-            glGetCompressedTextureImage(texture.handle, level,
-                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                                        staging_buffer.data() + mip_offset);
+            glGetCompressedTextureImage(texture.handle, level, size, mip_data);
        } else {
-            glGetTextureImage(texture.handle, level, format, type,
-                              static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                              staging_buffer.data() + mip_offset);
+            glGetTextureImage(texture.handle, level, format, type, size, mip_data);
        }
    }
 }
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -109,6 +109,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
    const auto topology = static_cast<std::size_t>(regs.draw.topology.Value());
    const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]];

+    const auto& clip = regs.view_volume_clip_control;
+    const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
+
    Maxwell::Cull::FrontFace front_face = regs.cull.front_face;
    if (regs.screen_y_control.triangle_rast_flip != 0 &&
        regs.viewport_transform[0].scale_y > 0.0f) {
@@ -119,8 +122,9 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
    }

    const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
-    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, gl_ndc,
-                                          regs.cull.cull_face, front_face);
+    return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled,
+                                          depth_clamp_enabled, gl_ndc, regs.cull.cull_face,
+                                          front_face);
 }

 } // Anonymous namespace
@@ -222,15 +226,17 @@ bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const
 std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept {
    return static_cast<std::size_t>(cull_enable) ^
           (static_cast<std::size_t>(depth_bias_enable) << 1) ^
-           (static_cast<std::size_t>(ndc_minus_one_to_one) << 2) ^
+           (static_cast<std::size_t>(depth_clamp_enable) << 2) ^
+           (static_cast<std::size_t>(ndc_minus_one_to_one) << 3) ^
           (static_cast<std::size_t>(cull_face) << 24) ^
           (static_cast<std::size_t>(front_face) << 48);
 }

 bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept {
-    return std::tie(cull_enable, depth_bias_enable, ndc_minus_one_to_one, cull_face, front_face) ==
-           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.ndc_minus_one_to_one, rhs.cull_face,
-                    rhs.front_face);
+    return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one,
+                    cull_face, front_face) ==
+           std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable,
+                    rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face);
 }

 std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept {
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -170,15 +170,17 @@ struct FixedPipelineState {
    };

    struct Rasterizer {
-        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool ndc_minus_one_to_one,
-                             Maxwell::Cull::CullFace cull_face, Maxwell::Cull::FrontFace front_face)
+        constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
+                             bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face,
+                             Maxwell::Cull::FrontFace front_face)
            : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
-              ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{
-                                                                                    front_face} {}
+              depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
+              cull_face{cull_face}, front_face{front_face} {}
        Rasterizer() = default;

        bool cull_enable;
        bool depth_bias_enable;
+        bool depth_clamp_enable;
        bool ndc_minus_one_to_one;
        Maxwell::Cull::CullFace cull_face;
        Maxwell::Cull::FrontFace front_face;
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
    return {};
 }

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                Tegra::Texture::TextureFilter filter) {
    switch (wrap_mode) {
    case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
    case Tegra::Texture::WrapMode::Border:
        return vk::SamplerAddressMode::eClampToBorder;
    case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
+            // by sending an invalid enumeration.
+            return static_cast<vk::SamplerAddressMode>(0xcafe);
+        }
+        // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
        switch (filter) {
        case Tegra::Texture::TextureFilter::Nearest:
            return vk::SamplerAddressMode::eClampToEdge;
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);

 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);

-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                Tegra::Texture::TextureFilter filter);

 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
 #include <cstring>
 #include <memory>
 #include <optional>
 #include <tuple>

-#include "common/alignment.h"
 #include "common/assert.h"
-#include "core/memory.h"
-#include "video_core/memory_manager.h"
+#include "common/bit_util.h"
+#include "core/core.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"

 namespace Vulkan {

-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
-                                     std::size_t alignment, u8* host_ptr)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
-      alignment{alignment} {}
+namespace {

-VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
-                             Memory::Memory& cpu_memory_,
-                             VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                             VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
-    : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
-                                                                                   cpu_memory_} {
-    const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                       vk::BufferUsageFlagBits::eIndexBuffer |
-                       vk::BufferUsageFlagBits::eUniformBuffer;
-    const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
-                        vk::AccessFlagBits::eUniformRead;
-    stream_buffer =
-        std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
-                                         vk::PipelineStageFlagBits::eAllCommands);
-    buffer_handle = stream_buffer->GetBuffer();
+const auto BufferUsage =
+    vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
+    vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
+
+const auto UploadPipelineStage =
+    vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
+    vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+    vk::PipelineStageFlagBits::eComputeShader;
+
+const auto UploadAccessBarriers =
+    vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
+    vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
+    vk::AccessFlagBits::eIndexRead;
+
+auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
+    return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
 }

+} // Anonymous namespace
+
+CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                                     CacheAddr cache_addr, std::size_t size)
+    : VideoCommon::BufferBlock{cache_addr, size} {
+    const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
+                                         BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const auto dev{device.GetLogical()};
+    buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer.commit = memory_manager.Commit(*buffer.handle, false);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                             const VKDevice& device, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+    : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
+                                                                   CreateStreamBuffer(device,
+                                                                                      scheduler)},
+      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+                                                                                staging_pool} {}
+
 VKBufferCache::~VKBufferCache() = default;

-u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) {
-    const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
-    ASSERT_MSG(cpu_addr, "Invalid GPU address");
-
-    // Cache management is a big overhead, so only cache entries with a given size.
-    // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
-    if (cache) {
-        const auto entry = TryGet(host_ptr);
-        if (entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return entry->GetOffset();
-            }
-            Unregister(entry);
-        }
-    }
-
-    AlignBuffer(alignment);
-    const u64 uploaded_offset = buffer_offset;
-
-    if (host_ptr == nullptr) {
-        return uploaded_offset;
-    }
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    if (cache) {
-        auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
-                                                         alignment, host_ptr);
-        Register(entry);
-    }
-
-    return uploaded_offset;
+Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+    return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
 }

-u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return uploaded_offset;
+const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
+    return buffer->GetHandle();
 }

-std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) {
-    AlignBuffer(alignment);
-    u8* const uploaded_ptr = buffer_ptr;
-    const u64 uploaded_offset = buffer_offset;
-
-    buffer_ptr += size;
-    buffer_offset += size;
-    return {uploaded_ptr, uploaded_offset};
+const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
+    size = std::max(size, std::size_t(4));
+    const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
+        cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
+    });
+    return &*empty.handle;
 }

-void VKBufferCache::Reserve(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size);
-    buffer_offset = buffer_offset_base;
+void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                    const u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    std::memcpy(staging.commit->Map(size), data, size);

-    if (invalidate) {
-        InvalidateAll();
-    }
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
+                                     offset, size)},
+            {}, dld);
+    });
 }

-void VKBufferCache::Send() {
-    stream_buffer->Send(buffer_offset - buffer_offset_base);
+void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                                      u8* data) {
+    const auto& staging = staging_pool.GetUnusedBuffer(size, true);
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
+                      size](auto cmdbuf, auto& dld) {
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
+                vk::PipelineStageFlagBits::eComputeShader,
+            vk::PipelineStageFlagBits::eTransfer, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
+                                     vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
+            {}, dld);
+        cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
+    });
+    scheduler.Finish();
+
+    std::memcpy(data, staging.commit->Map(size), size);
 }

-void VKBufferCache::AlignBuffer(std::size_t alignment) {
-    // Align the offset, not the mapped pointer
-    const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment);
-    buffer_ptr += offset_aligned - buffer_offset;
-    buffer_offset = offset_aligned;
+void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                              std::size_t dst_offset, std::size_t size) {
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
+                      dst_offset, size](auto cmdbuf, auto& dld) {
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
+                                     vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
+                                     VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
+             vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
+                                     VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
+                                     dst_offset, size)},
+            {}, dld);
+    });
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
 #pragma once

 #include <memory>
-#include <tuple>
+#include <unordered_map>
+#include <vector>

 #include "common/common_types.h"
-#include "video_core/gpu.h"
+#include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"

-namespace Memory {
-class Memory;
-}
-
-namespace Tegra {
-class MemoryManager;
+namespace Core {
+class System;
 }

 namespace Vulkan {

 class VKDevice;
-class VKFence;
 class VKMemoryManager;
-class VKStreamBuffer;
+class VKScheduler;

-class CachedBufferEntry final : public RasterizerCacheObject {
+class CachedBufferBlock final : public VideoCommon::BufferBlock {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
-                               u8* host_ptr);
+    explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
+                               CacheAddr cache_addr, std::size_t size);
+    ~CachedBufferBlock();

-    VAddr GetCpuAddr() const override {
-        return cpu_addr;
-    }
-
-    std::size_t GetSizeInBytes() const override {
-        return size;
-    }
-
-    std::size_t GetSize() const {
-        return size;
-    }
-
-    u64 GetOffset() const {
-        return offset;
-    }
-
-    std::size_t GetAlignment() const {
-        return alignment;
+    const vk::Buffer* GetHandle() const {
+        return &*buffer.handle;
    }

 private:
-    VAddr cpu_addr{};
-    std::size_t size{};
-    u64 offset{};
-    std::size_t alignment{};
+    VKBuffer buffer;
 };

-class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
 public:
-    explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_,
-                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
-                           VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size);
+    explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+                           const VKDevice& device, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
    ~VKBufferCache();

-    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
-    /// allocated.
-    u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
-
-    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
-    u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4);
-
-    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
-    std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
-
-    /// Reserves a region of memory to be used in subsequent upload/reserve operations.
-    void Reserve(std::size_t max_size);
-
-    /// Ensures that the set data is sent to the device.
-    void Send();
-
-    /// Returns the buffer cache handle.
-    vk::Buffer GetBuffer() const {
-        return buffer_handle;
-    }
+    const vk::Buffer* GetEmptyBuffer(std::size_t size) override;

 protected:
-    // We do not have to flush this cache as things in it are never modified by us.
-    void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
+    void WriteBarrier() override {}
+
+    Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+    const vk::Buffer* ToHandle(const Buffer& buffer) override;
+
+    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                         const u8* data) override;
+
+    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+                           u8* data) override;
+
+    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+                   std::size_t dst_offset, std::size_t size) override;

 private:
-    void AlignBuffer(std::size_t alignment);
-
-    Tegra::MemoryManager& tegra_memory_manager;
-    Memory::Memory& cpu_memory;
-
-    std::unique_ptr<VKStreamBuffer> stream_buffer;
-    vk::Buffer buffer_handle;
-
-    u8* buffer_ptr = nullptr;
-    u64 buffer_offset = 0;
-    u64 buffer_offset_base = 0;
+    const VKDevice& device;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -0,0 +1,339 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+namespace {
+
+// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
+constexpr u8 quad_array[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
+    0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
+    0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
+    0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
+    0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
+    0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
+    0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+// Uint8 SPIR-V module. Generated from the "shaders/" directory.
+constexpr u8 uint8_pass[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
+    0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
+    0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
+    0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
+    0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
+    0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
+    0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
+    0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
+    0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
+    0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
+    0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
+    0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
+    0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+} // Anonymous namespace
+
+VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                             const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                             const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                             const std::vector<vk::PushConstantRange> push_constants,
+                             std::size_t code_size, const u8* code) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+    descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
+
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout,
+                                                          static_cast<u32>(push_constants.size()),
+                                                          push_constants.data());
+    layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+
+    if (!templates.empty()) {
+        const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+            {}, static_cast<u32>(templates.size()), templates.data(),
+            vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+            vk::PipelineBindPoint::eGraphics, *layout, 0);
+        descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+
+        descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
+    }
+
+    auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
+    std::memcpy(code_copy.get(), code, code_size);
+    const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
+    module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
+
+    const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module,
+                                                     "main", nullptr);
+
+    const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0);
+    pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld);
+}
+
+VKComputePass::~VKComputePass() = default;
+
+vk::DescriptorSet VKComputePass::CommitDescriptorSet(
+    VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator->Commit(fence);
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKStagingBufferPool& staging_buffer_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
+                    std::size(quad_array), quad_array),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+QuadArrayPass::~QuadArrayPass() = default;
+
+std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
+    const u32 num_triangle_vertices = num_vertices * 6 / 4;
+    const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    ASSERT(num_vertices % 4 == 0);
+    const u32 num_quads = num_vertices / 4;
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
+                      first, set](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first,
+                             dld);
+        cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {*buffer.handle, 0};
+}
+
+Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                     VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                     VKUpdateDescriptorQueue& update_descriptor_queue)
+    : VKComputePass(device, descriptor_pool,
+                    {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr),
+                     vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
+                                                    vk::ShaderStageFlagBits::eCompute, nullptr)},
+                    {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
+                                                       0, sizeof(DescriptorUpdateEntry))},
+                    {}, std::size(uint8_pass), uint8_pass),
+      scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
+      update_descriptor_queue{update_descriptor_queue} {}
+
+Uint8Pass::~Uint8Pass() = default;
+
+std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                                      u64 src_offset) {
+    const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+    auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
+
+    update_descriptor_queue.Acquire();
+    update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
+    update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size);
+    const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+
+    scheduler.RequestOutsideRenderPassOperationContext();
+    scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
+                      num_vertices](auto cmdbuf, auto& dld) {
+        constexpr u32 dispatch_size = 1024;
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld);
+        cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld);
+
+        const vk::BufferMemoryBarrier barrier(
+            vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead,
+            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0,
+            static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16));
+        cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
+                               vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld);
+    });
+    return {&*buffer.handle, 0};
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -0,0 +1,77 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKStagingBufferPool;
+class VKUpdateDescriptorQueue;
+
+class VKComputePass {
+public:
+    explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
+                           const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                           const std::vector<vk::DescriptorUpdateTemplateEntry>& templates,
+                           const std::vector<vk::PushConstantRange> push_constants,
+                           std::size_t code_size, const u8* code);
+    ~VKComputePass();
+
+protected:
+    vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
+                                          VKFence& fence);
+
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniquePipelineLayout layout;
+    UniquePipeline pipeline;
+
+private:
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    std::optional<DescriptorAllocator> descriptor_allocator;
+    UniqueShaderModule module;
+};
+
+class QuadArrayPass final : public VKComputePass {
+public:
+    explicit QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
+                           VKDescriptorPool& descriptor_pool,
+                           VKStagingBufferPool& staging_buffer_pool,
+                           VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~QuadArrayPass();
+
+    std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+class Uint8Pass final : public VKComputePass {
+public:
+    explicit Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
+                       VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
+                       VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~Uint8Pass();
+
+    std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer,
+                                               u64 src_offset);
+
+private:
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_buffer_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -0,0 +1,112 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                                     VKDescriptorPool& descriptor_pool,
+                                     VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     const SPIRVShader& shader)
+    : device{device}, scheduler{scheduler}, entries{shader.entries},
+      descriptor_set_layout{CreateDescriptorSetLayout()},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate()},
+      shader_module{CreateShaderModule(shader.code)}, pipeline{CreatePipeline()} {}
+
+VKComputePipeline::~VKComputePipeline() = default;
+
+vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+    u32 binding = 0;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        // TODO(Rodrigo): Maybe make individual bindings here?
+        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
+            bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute,
+                                  nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr);
+    const auto dev = device.GetLogical();
+    return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries);
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
+    const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data());
+    const auto dev = device.GetLogical();
+    return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader());
+}
+
+UniquePipeline VKComputePipeline::CreatePipeline() const {
+    vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute,
+                                                      *shader_module, "main", nullptr);
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+    if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) {
+        shader_stage_ci.pNext = &subgroup_size_ci;
+    }
+
+    const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0);
+    const auto dev = device.GetLogical();
+    return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -0,0 +1,66 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class VKComputePipeline final {
+public:
+    explicit VKComputePipeline(const VKDevice& device, VKScheduler& scheduler,
+                               VKDescriptorPool& descriptor_pool,
+                               VKUpdateDescriptorQueue& update_descriptor_queue,
+                               const SPIRVShader& shader);
+    ~VKComputePipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    const ShaderEntries& GetEntries() {
+        return entries;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout() const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const;
+
+    UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const;
+
+    UniquePipeline CreatePipeline() const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    ShaderEntries entries;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    UniqueShaderModule shader_module;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
+constexpr std::size_t SETS_GROW_RATE = 0x20;
+
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
+                                         vk::DescriptorSetLayout layout)
+    : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+
+DescriptorAllocator::~DescriptorAllocator() = default;
+
+vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
+    return *descriptors[CommitResource(fence)];
+}
+
+void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
+    auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin);
+    descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
+                       std::make_move_iterator(new_sets.end()));
+}
+
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
+    : device{device}, active_pool{AllocateNewPool()} {}
+
+VKDescriptorPool::~VKDescriptorPool() = default;
+
+vk::DescriptorPool VKDescriptorPool::AllocateNewPool() {
+    static constexpr u32 num_sets = 0x20000;
+    static constexpr vk::DescriptorPoolSize pool_sizes[] = {
+        {vk::DescriptorType::eUniformBuffer, num_sets * 90},
+        {vk::DescriptorType::eStorageBuffer, num_sets * 60},
+        {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64},
+        {vk::DescriptorType::eCombinedImageSampler, num_sets * 64},
+        {vk::DescriptorType::eStorageImage, num_sets * 40}};
+
+    const vk::DescriptorPoolCreateInfo create_info(
+        vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets,
+        static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes));
+    const auto dev = device.GetLogical();
+    return *pools.emplace_back(
+        dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader()));
+}
+
+std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors(
+    vk::DescriptorSetLayout layout, std::size_t count) {
+    std::vector layout_copies(count, layout);
+    vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count),
+                                                layout_copies.data());
+
+    std::vector<vk::DescriptorSet> sets(count);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) {
+    case vk::Result::eSuccess:
+        break;
+    case vk::Result::eErrorOutOfPoolMemory:
+        active_pool = AllocateNewPool();
+        allocate_info.descriptorPool = active_pool;
+        if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
+            break;
+        }
+        [[fallthrough]];
+    default:
+        vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
+    }
+
+    vk::PoolFree deleter(dev, active_pool, dld);
+    std::vector<UniqueDescriptorSet> unique_sets;
+    unique_sets.reserve(count);
+    for (const auto set : sets) {
+        unique_sets.push_back(UniqueDescriptorSet{set, deleter});
+    }
+    return unique_sets;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -0,0 +1,56 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Vulkan {
+
+class VKDescriptorPool;
+
+class DescriptorAllocator final : public VKFencedPool {
+public:
+    explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout);
+    ~DescriptorAllocator() override;
+
+    DescriptorAllocator(const DescriptorAllocator&) = delete;
+
+    vk::DescriptorSet Commit(VKFence& fence);
+
+protected:
+    void Allocate(std::size_t begin, std::size_t end) override;
+
+private:
+    VKDescriptorPool& descriptor_pool;
+    const vk::DescriptorSetLayout layout;
+
+    std::vector<UniqueDescriptorSet> descriptors;
+};
+
+class VKDescriptorPool final {
+    friend DescriptorAllocator;
+
+public:
+    explicit VKDescriptorPool(const VKDevice& device);
+    ~VKDescriptorPool();
+
+private:
+    vk::DescriptorPool AllocateNewPool();
+
+    std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout,
+                                                         std::size_t count);
+
+    const VKDevice& device;
+
+    std::vector<UniqueDescriptorPool> pools;
+    vk::DescriptorPool active_pool;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -0,0 +1,271 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+namespace {
+
+vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
+    return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail),
+                              MaxwellToVK::StencilOp(face.action_depth_pass),
+                              MaxwellToVK::StencilOp(face.action_depth_fail),
+                              MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0);
+}
+
+bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) {
+    static constexpr std::array unsupported_topologies = {
+        vk::PrimitiveTopology::ePointList,
+        vk::PrimitiveTopology::eLineList,
+        vk::PrimitiveTopology::eTriangleList,
+        vk::PrimitiveTopology::eLineListWithAdjacency,
+        vk::PrimitiveTopology::eTriangleListWithAdjacency,
+        vk::PrimitiveTopology::ePatchList};
+    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
+                     topology) == std::end(unsupported_topologies);
+}
+
+} // Anonymous namespace
+
+VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                       VKDescriptorPool& descriptor_pool,
+                                       VKUpdateDescriptorQueue& update_descriptor_queue,
+                                       VKRenderPassCache& renderpass_cache,
+                                       const GraphicsPipelineCacheKey& key,
+                                       const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                       const SPIRVProgram& program)
+    : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
+      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
+      descriptor_allocator{descriptor_pool, *descriptor_set_layout},
+      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
+      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
+                                                                        program)},
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
+                                                                             key.renderpass_params,
+                                                                             program)} {}
+
+VKGraphicsPipeline::~VKGraphicsPipeline() = default;
+
+vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
+    if (!descriptor_template) {
+        return {};
+    }
+    const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+    update_descriptor_queue.Send(*descriptor_template, set);
+    return set;
+}
+
+UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
+    const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const {
+    const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci(
+        {}, static_cast<u32>(bindings.size()), bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld);
+}
+
+UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
+    const SPIRVProgram& program) const {
+    std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
+    u32 binding = 0;
+    u32 offset = 0;
+    for (const auto& stage : program) {
+        if (stage) {
+            FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset,
+                                                template_entries);
+        }
+    }
+    if (template_entries.empty()) {
+        // If the shader doesn't use descriptor sets, skip template creation.
+        return UniqueDescriptorUpdateTemplate{};
+    }
+
+    const vk::DescriptorUpdateTemplateCreateInfo template_ci(
+        {}, static_cast<u32>(template_entries.size()), template_entries.data(),
+        vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout,
+        vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld);
+}
+
+std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules(
+    const SPIRVProgram& program) const {
+    std::vector<UniqueShaderModule> modules;
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
+        const auto& stage = program[i];
+        if (!stage) {
+            continue;
+        }
+        const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32),
+                                                   stage->code.data());
+        modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld));
+    }
+    return modules;
+}
+
+UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
+                                                  const SPIRVProgram& program) const {
+    const auto& vi = fixed_state.vertex_input;
+    const auto& ia = fixed_state.input_assembly;
+    const auto& ds = fixed_state.depth_stencil;
+    const auto& cd = fixed_state.color_blending;
+    const auto& ts = fixed_state.tessellation;
+    const auto& rs = fixed_state.rasterizer;
+
+    std::vector<vk::VertexInputBindingDescription> vertex_bindings;
+    std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
+    for (std::size_t i = 0; i < vi.num_bindings; ++i) {
+        const auto& binding = vi.bindings[i];
+        const bool instanced = binding.divisor != 0;
+        const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex;
+        vertex_bindings.emplace_back(binding.index, binding.stride, rate);
+        if (instanced) {
+            vertex_binding_divisors.emplace_back(binding.index, binding.divisor);
+        }
+    }
+
+    std::vector<vk::VertexInputAttributeDescription> vertex_attributes;
+    const auto& input_attributes = program[0]->entries.attributes;
+    for (std::size_t i = 0; i < vi.num_attributes; ++i) {
+        const auto& attribute = vi.attributes[i];
+        if (input_attributes.find(attribute.index) == input_attributes.end()) {
+            // Skip attributes not used by the vertex shaders.
+            continue;
+        }
+        vertex_attributes.emplace_back(attribute.index, attribute.buffer,
+                                       MaxwellToVK::VertexFormat(attribute.type, attribute.size),
+                                       attribute.offset);
+    }
+
+    vk::PipelineVertexInputStateCreateInfo vertex_input_ci(
+        {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(),
+        static_cast<u32>(vertex_attributes.size()), vertex_attributes.data());
+
+    const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci(
+        static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data());
+    if (!vertex_binding_divisors.empty()) {
+        vertex_input_ci.pNext = &vertex_input_divisor_ci;
+    }
+
+    const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci(
+        {}, primitive_topology,
+        ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology));
+
+    const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points);
+
+    const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr,
+                                                          Maxwell::NumViewports, nullptr);
+
+    // TODO(Rodrigo): Find out what's the default register value for front face
+    const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
+        {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
+        rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
+        rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
+        rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
+        {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
+
+    const vk::CompareOp depth_test_compare = ds.depth_test_enable
+                                                 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
+                                                 : vk::CompareOp::eAlways;
+
+    const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci(
+        {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable,
+        ds.stencil_enable, GetStencilFaceState(ds.front_stencil),
+        GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f);
+
+    std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
+    const std::size_t num_attachments =
+        std::min(cd.attachments_count, renderpass_params.color_attachments.size());
+    for (std::size_t i = 0; i < num_attachments; ++i) {
+        constexpr std::array component_table{
+            vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG,
+            vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA};
+        const auto& blend = cd.attachments[i];
+
+        vk::ColorComponentFlags color_components{};
+        for (std::size_t j = 0; j < component_table.size(); ++j) {
+            if (blend.components[j])
+                color_components |= component_table[j];
+        }
+
+        cb_attachments[i] = vk::PipelineColorBlendAttachmentState(
+            blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func),
+            MaxwellToVK::BlendFactor(blend.dst_rgb_func),
+            MaxwellToVK::BlendEquation(blend.rgb_equation),
+            MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func),
+            MaxwellToVK::BlendEquation(blend.a_equation), color_components);
+    }
+    const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy,
+                                                                  static_cast<u32>(num_attachments),
+                                                                  cb_attachments.data(), {});
+
+    constexpr std::array dynamic_states = {
+        vk::DynamicState::eViewport,         vk::DynamicState::eScissor,
+        vk::DynamicState::eDepthBias,        vk::DynamicState::eBlendConstants,
+        vk::DynamicState::eDepthBounds,      vk::DynamicState::eStencilCompareMask,
+        vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference};
+    const vk::PipelineDynamicStateCreateInfo dynamic_state_ci(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
+    subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
+
+    std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
+    std::size_t module_index = 0;
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        if (!program[stage]) {
+            continue;
+        }
+        const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage);
+        const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum);
+        auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage,
+                                                    *modules[module_index++], "main", nullptr);
+        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) {
+            stage_ci.pNext = &subgroup_size_ci;
+        }
+    }
+
+    const vk::GraphicsPipelineCreateInfo create_info(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci,
+        &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci,
+        &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+struct GraphicsPipelineCacheKey;
+
+class VKDescriptorPool;
+class VKDevice;
+class VKRenderPassCache;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
+
+class VKGraphicsPipeline final {
+public:
+    explicit VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
+                                VKDescriptorPool& descriptor_pool,
+                                VKUpdateDescriptorQueue& update_descriptor_queue,
+                                VKRenderPassCache& renderpass_cache,
+                                const GraphicsPipelineCacheKey& key,
+                                const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                                const SPIRVProgram& program);
+    ~VKGraphicsPipeline();
+
+    vk::DescriptorSet CommitDescriptorSet();
+
+    vk::Pipeline GetHandle() const {
+        return *pipeline;
+    }
+
+    vk::PipelineLayout GetLayout() const {
+        return *layout;
+    }
+
+    vk::RenderPass GetRenderPass() const {
+        return renderpass;
+    }
+
+private:
+    UniqueDescriptorSetLayout CreateDescriptorSetLayout(
+        const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const;
+
+    UniquePipelineLayout CreatePipelineLayout() const;
+
+    UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate(
+        const SPIRVProgram& program) const;
+
+    std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
+
+    UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params,
+                                  const SPIRVProgram& program) const;
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    const FixedPipelineState fixed_state;
+    const u64 hash;
+
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    DescriptorAllocator descriptor_allocator;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+    UniquePipelineLayout layout;
+    UniqueDescriptorUpdateTemplate descriptor_template;
+    std::vector<UniqueShaderModule> modules;
+
+    vk::RenderPass renderpass;
+    UniquePipeline pipeline;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
 #include <optional>
 #include <tuple>
 #include <vector>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -16,34 +17,32 @@

 namespace Vulkan {

-// TODO(Rodrigo): Fine tune this number
-constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+namespace {
+
+u64 GetAllocationChunkSize(u64 required_size) {
+    static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
+    auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
+    return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
+}
+
+} // Anonymous namespace

 class VKMemoryAllocation final {
 public:
    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
-                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
-        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
-          shifted_type{ShiftType(type)}, is_mappable{properties &
-                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
-        if (is_mappable) {
-            const auto dev = device.GetLogical();
-            const auto& dld = device.GetDispatchLoader();
-            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
-        }
-    }
+                                vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
+          shifted_type{ShiftType(type)} {}

    ~VKMemoryAllocation() {
        const auto dev = device.GetLogical();
        const auto& dld = device.GetDispatchLoader();
-        if (is_mappable)
-            dev.unmapMemory(memory, dld);
        dev.free(memory, nullptr, dld);
    }

    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
-        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
-                                        static_cast<u64>(alignment));
+        auto found = TryFindFreeSection(free_iterator, allocation_size,
+                                        static_cast<u64>(commit_size), static_cast<u64>(alignment));
        if (!found) {
            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
                                       static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
                return nullptr;
            }
        }
-        u8* address = is_mappable ? base_address + *found : nullptr;
-        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+        auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
                                                           *found + commit_size);
        commits.push_back(commit.get());

@@ -65,12 +63,10 @@ public:

    void Free(const VKMemoryCommitImpl* commit) {
        ASSERT(commit);
-        const auto it =
-            std::find_if(commits.begin(), commits.end(),
-                         [&](const auto& stored_commit) { return stored_commit == commit; });
+
+        const auto it = std::find(std::begin(commits), std::end(commits), commit);
        if (it == commits.end()) {
-            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
-            UNREACHABLE();
+            UNREACHABLE_MSG("Freeing unallocated commit!");
            return;
        }
        commits.erase(it);
@@ -88,11 +84,11 @@ private:
    }

    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
-    /// requeriments.
+    /// requirements.
    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
-        u64 iterator = start;
-        while (iterator + size < end) {
-            const u64 try_left = Common::AlignUp(iterator, alignment);
+        u64 iterator = Common::AlignUp(start, alignment);
+        while (iterator + size <= end) {
+            const u64 try_left = iterator;
            const u64 try_right = try_left + size;

            bool overlap = false;
@@ -100,7 +96,7 @@ private:
                const auto [commit_left, commit_right] = commit->interval;
                if (try_left < commit_right && commit_left < try_right) {
                    // There's an overlap, continue the search where the overlapping commit ends.
-                    iterator = commit_right;
+                    iterator = Common::AlignUp(commit_right, alignment);
                    overlap = true;
                    break;
                }
@@ -110,6 +106,7 @@ private:
                return try_left;
            }
        }
+
        // No free regions where found, return an empty optional.
        return std::nullopt;
    }
@@ -117,12 +114,8 @@ private:
    const VKDevice& device;                   ///< Vulkan device.
    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
-    const u64 alloc_size;                     ///< Size of this allocation.
+    const u64 allocation_size;                ///< Size of this allocation.
    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
-    const bool is_mappable;                   ///< Whether the allocation is mappable.
-
-    /// Base address of the mapped pointer.
-    u8* base_address{};

    /// Hints where the next free region is likely going to be.
    u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
 };

 VKMemoryManager::VKMemoryManager(const VKDevice& device)
-    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
-      is_memory_unified{GetMemoryUnified(props)} {}
+    : device{device}, properties{device.GetPhysical().getMemoryProperties(
+                          device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(properties)} {}

 VKMemoryManager::~VKMemoryManager() = default;

-VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
-    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
+                                       bool host_visible) {
+    const u64 chunk_size = GetAllocationChunkSize(requirements.size);

    // When a host visible commit is asked, search for host visible and coherent, otherwise search
    // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
            : vk::MemoryPropertyFlagBits::eDeviceLocal;

-    const auto TryCommit = [&]() -> VKMemoryCommit {
-        for (auto& alloc : allocs) {
-            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
-                continue;
-
-            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
-                return commit;
-            }
-        }
-        return {};
-    };
-
-    if (auto commit = TryCommit(); commit) {
+    if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
        return commit;
    }

    // Commit has failed, allocate more memory.
-    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
-        // TODO(Rodrigo): Try to use host memory.
-        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
-        UNREACHABLE();
+    if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
+        // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
+        // Allocation has failed, panic.
+        UNREACHABLE_MSG("Ran out of VRAM!");
+        return {};
    }

    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
    // there's a bug.
-    auto commit = TryCommit();
+    auto commit = TryAllocCommit(requirements, wanted_properties);
    ASSERT(commit);
    return commit;
 }
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
 VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
 VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
-    auto commit = Commit(requeriments, host_visible);
+    auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
    return commit;
 }

 bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
                                  u64 size) {
-    const u32 type = [&]() {
-        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
-            const auto flags = props.memoryTypes[type_index].propertyFlags;
+    const u32 type = [&] {
+        for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
+            const auto flags = properties.memoryTypes[type_index].propertyFlags;
            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
                // The type matches in type and in the wanted properties.
                return type_index;
            }
        }
-        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
-        UNREACHABLE();
-        return 0u;
+        UNREACHABLE_MSG("Couldn't find a compatible memory type!");
+        return 0U;
    }();

    const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    // Try to allocate found type.
    const vk::MemoryAllocateInfo memory_ai(size, type);
    vk::DeviceMemory memory;
-    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+    if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
        res != vk::Result::eSuccess) {
        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
        return false;
    }
-    allocs.push_back(
+    allocations.push_back(
        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
    return true;
 }

-/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
-    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
-        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                               vk::MemoryPropertyFlags wanted_properties) {
+    for (auto& allocation : allocations) {
+        if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
+            continue;
+        }
+        if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
+            return commit;
+        }
+    }
+    return {};
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(
+    const vk::PhysicalDeviceMemoryProperties& properties) {
+    for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
+        if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
            // Memory is considered unified when heaps are device local only.
            return false;
        }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
    return true;
 }

-VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
-                                       u8* data, u64 begin, u64 end)
-    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                       vk::DeviceMemory memory, u64 begin, u64 end)
+    : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}

 VKMemoryCommitImpl::~VKMemoryCommitImpl() {
    allocation->Free(this);
 }

-u8* VKMemoryCommitImpl::GetData() const {
-    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
-    return data;
+MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
+    const auto dev = device.GetLogical();
+    const auto address = reinterpret_cast<u8*>(
+        dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
+    return MemoryMap{this, address};
+}
+
+void VKMemoryCommitImpl::Unmap() const {
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(memory, device.GetDispatchLoader());
+}
+
+MemoryMap VKMemoryCommitImpl::Map() const {
+    return Map(interval.second - interval.first);
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@

 namespace Vulkan {

+class MemoryMap;
 class VKDevice;
 class VKMemoryAllocation;
 class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
 class VKMemoryManager final {
 public:
    explicit VKMemoryManager(const VKDevice& device);
+    VKMemoryManager(const VKMemoryManager&) = delete;
    ~VKMemoryManager();

    /**
     * Commits a memory with the specified requeriments.
-     * @param reqs Requeriments returned from a Vulkan call.
+     * @param requirements Requirements returned from a Vulkan call.
     * @param host_visible Signals the allocator that it *must* use host visible and coherent
-     * memory. When passing false, it will try to allocate device local memory.
+     *                     memory. When passing false, it will try to allocate device local memory.
     * @returns A memory commit.
     */
    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
    /// Allocates a chunk of memory.
    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);

-    /// Returns true if the device uses an unified memory model.
-    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+    /// Tries to allocate a memory commit.
+    VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
+                                  vk::MemoryPropertyFlags wanted_properties);

-    const VKDevice& device;                                  ///< Device handler.
-    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
-    const bool is_memory_unified;                            ///< True if memory model is unified.
-    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
+
+    const VKDevice& device;                              ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
+    const bool is_memory_unified;                        ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
 };

 class VKMemoryCommitImpl final {
    friend VKMemoryAllocation;
+    friend MemoryMap;

 public:
-    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
-                                u64 begin, u64 end);
+    explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
+                                vk::DeviceMemory memory, u64 begin, u64 end);
    ~VKMemoryCommitImpl();

-    /// Returns the writeable memory map. The commit has to be mappable.
-    u8* GetData() const;
+    /// Maps a memory region and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map(u64 size, u64 offset = 0) const;
+
+    /// Maps the whole commit and returns a pointer to it.
+    /// It's illegal to have more than one memory map at the same time.
+    MemoryMap Map() const;

    /// Returns the Vulkan memory handler.
    vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
    }

 private:
+    /// Unmaps memory.
+    void Unmap() const;
+
+    const VKDevice& device;           ///< Vulkan device.
    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
-    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+/// Holds ownership of a memory map.
+class MemoryMap final {
+public:
+    explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
+        : commit{commit}, address{address} {}
+
+    ~MemoryMap() {
+        if (commit) {
+            commit->Unmap();
+        }
+    }
+
+    /// Prematurely releases the memory map.
+    void Release() {
+        commit->Unmap();
+        commit = nullptr;
+    }
+
+    /// Returns the address of the memory map.
+    u8* GetAddress() const {
+        return address;
+    }
+
+    /// Returns the address of the memory map;
+    operator u8*() const {
+        return address;
+    }
+
+private:
+    const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
+    u8* address{};                      ///< Address to the mapped memory.
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -0,0 +1,395 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+#include "video_core/shader/compiler_settings.h"
+
+namespace Vulkan {
+
+MICROPROFILE_DECLARE(Vulkan_PipelineCache);
+
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
+    VideoCommon::Shader::CompileDepth::FullDecompile};
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    const std::size_t start_offset = is_compute ? 0 : 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+        std::fill(program_code.begin(), program_code.end(), 0);
+        return program_code;
+    });
+    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
+                                   program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code, is_compute));
+    return program_code;
+}
+
+constexpr std::size_t GetStageFromProgram(std::size_t program) {
+    return program == 0 ? 0 : program - 1;
+}
+
+constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
+    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+}
+
+ShaderType GetShaderType(Maxwell::ShaderProgram program) {
+    switch (program) {
+    case Maxwell::ShaderProgram::VertexB:
+        return ShaderType::Vertex;
+    case Maxwell::ShaderProgram::TesselationControl:
+        return ShaderType::TesselationControl;
+    case Maxwell::ShaderProgram::TesselationEval:
+        return ShaderType::TesselationEval;
+    case Maxwell::ShaderProgram::Geometry:
+        return ShaderType::Geometry;
+    case Maxwell::ShaderProgram::Fragment:
+        return ShaderType::Fragment;
+    default:
+        UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program));
+        return ShaderType::Vertex;
+    }
+}
+
+u32 FillDescriptorLayout(const ShaderEntries& entries,
+                         std::vector<vk::DescriptorSetLayoutBinding>& bindings,
+                         Maxwell::ShaderProgram program_type, u32 base_binding) {
+    const ShaderType stage = GetStageFromProgram(program_type);
+    const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage);
+
+    u32 binding = base_binding;
+    const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) {
+        for (std::size_t i = 0; i < num_entries; ++i) {
+            bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr);
+        }
+    };
+    AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
+    return binding;
+}
+
+} // Anonymous namespace
+
+CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
+                           GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
+                           ProgramCode program_code, u32 main_offset)
+    : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
+      program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)},
+      shader_ir{this->program_code, main_offset, compiler_settings, locker},
+      entries{GenerateShaderEntries(shader_ir)} {}
+
+CachedShader::~CachedShader() = default;
+
+Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
+    Core::System& system, Tegra::Engines::ShaderType stage) {
+    if (stage == Tegra::Engines::ShaderType::Compute) {
+        return system.GPU().KeplerCompute();
+    } else {
+        return system.GPU().Maxwell3D();
+    }
+}
+
+VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                                 const VKDevice& device, VKScheduler& scheduler,
+                                 VKDescriptorPool& descriptor_pool,
+                                 VKUpdateDescriptorQueue& update_descriptor_queue)
+    : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
+      descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
+      renderpass_cache(device) {}
+
+VKPipelineCache::~VKPipelineCache() = default;
+
+std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+    const auto& gpu = system.GPU().Maxwell3D();
+    auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
+    if (!dirty) {
+        return last_shaders;
+    }
+    dirty = false;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto& shader_config = gpu.regs.shader_config[index];
+        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        auto& memory_manager{system.GPU().MemoryManager()};
+        const GPUVAddr program_addr{GetShaderAddress(system, program)};
+        const auto host_ptr{memory_manager.GetPointer(program_addr)};
+        auto shader = TryGet(host_ptr);
+        if (!shader) {
+            // No shader found - create a new one
+            constexpr u32 stage_offset = 10;
+            const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+
+            const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+            ASSERT(cpu_addr);
+
+            shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
+                                                    host_ptr, std::move(code), stage_offset);
+            Register(shader);
+        }
+        shaders[index] = std::move(shader);
+    }
+    return last_shaders = shaders;
+}
+
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    if (last_graphics_pipeline && last_graphics_key == key) {
+        return *last_graphics_pipeline;
+    }
+    last_graphics_key = key;
+
+    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+        const auto [program, bindings] = DecompileShaders(key);
+        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
+                                                     update_descriptor_queue, renderpass_cache, key,
+                                                     bindings, program);
+    }
+    return *(last_graphics_pipeline = entry.get());
+}
+
+VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
+    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
+
+    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
+    auto& entry = pair->second;
+    if (!is_cache_miss) {
+        return *entry;
+    }
+    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto program_addr = key.shader;
+    const auto host_ptr = memory_manager.GetPointer(program_addr);
+
+    auto shader = TryGet(host_ptr);
+    if (!shader) {
+        // No shader found - create a new one
+        const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+        ASSERT(cpu_addr);
+
+        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+        constexpr u32 kernel_main_offset = 0;
+        shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
+                                                program_addr, *cpu_addr, host_ptr, std::move(code),
+                                                kernel_main_offset);
+        Register(shader);
+    }
+
+    Specialization specialization;
+    specialization.workgroup_size = key.workgroup_size;
+    specialization.shared_memory_size = key.shared_memory_size;
+
+    const SPIRVShader spirv_shader{
+        Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
+        shader->GetEntries()};
+    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
+                                                update_descriptor_queue, spirv_shader);
+    return *entry;
+}
+
+void VKPipelineCache::Unregister(const Shader& shader) {
+    bool finished = false;
+    const auto Finish = [&] {
+        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
+        // flush.
+        if (finished) {
+            return;
+        }
+        finished = true;
+        scheduler.Finish();
+    };
+
+    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
+    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
+        auto& entry = it->first;
+        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
+            entry.shaders.end()) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = graphics_cache.erase(it);
+    }
+    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
+        auto& entry = it->first;
+        if (entry.shader != invalidated_addr) {
+            ++it;
+            continue;
+        }
+        Finish();
+        it = compute_cache.erase(it);
+    }
+
+    RasterizerCache::Unregister(shader);
+}
+
+std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>>
+VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
+    const auto& fixed_state = key.fixed_state;
+    auto& memory_manager = system.GPU().MemoryManager();
+    const auto& gpu = system.GPU().Maxwell3D();
+
+    Specialization specialization;
+    specialization.primitive_topology = fixed_state.input_assembly.topology;
+    if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
+        ASSERT(fixed_state.input_assembly.point_size != 0.0f);
+        specialization.point_size = fixed_state.input_assembly.point_size;
+    }
+    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
+        specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
+    }
+    specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
+    specialization.tessellation.primitive = fixed_state.tessellation.primitive;
+    specialization.tessellation.spacing = fixed_state.tessellation.spacing;
+    specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
+    for (const auto& rt : key.renderpass_params.color_attachments) {
+        specialization.enabled_rendertargets.set(rt.index);
+    }
+
+    SPIRVProgram program;
+    std::vector<vk::DescriptorSetLayoutBinding> bindings;
+
+    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
+
+        // Skip stages that are not enabled
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            continue;
+        }
+
+        const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
+        const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+        const auto shader = TryGet(host_ptr);
+        ASSERT(shader);
+
+        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
+        const auto program_type = GetShaderType(program_enum);
+        const auto& entries = shader->GetEntries();
+        program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
+                          entries};
+
+        if (program_enum == Maxwell::ShaderProgram::VertexA) {
+            // VertexB was combined with VertexA, so we skip the VertexB iteration
+            ++index;
+        }
+
+        const u32 old_binding = specialization.base_binding;
+        specialization.base_binding =
+            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
+        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
+    }
+    return {std::move(program), std::move(bindings)};
+}
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
+    static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
+    const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) {
+        const u32 count = static_cast<u32>(count_);
+        if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer &&
+            device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
+            // crash.
+            for (u32 i = 0; i < count; ++i) {
+                template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
+                                              offset + i * entry_size, entry_size);
+            }
+        } else if (count != 0) {
+            template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
+        }
+        offset += count * entry_size;
+        binding += count;
+    };
+
+    AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
+    AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
+    AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
+    AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
+    AddEntry(vk::DescriptorType::eStorageImage, entries.images.size());
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -0,0 +1,200 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
+#include "video_core/surface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKComputePipeline;
+class VKDescriptorPool;
+class VKDevice;
+class VKFence;
+class VKScheduler;
+class VKUpdateDescriptorQueue;
+
+class CachedShader;
+using Shader = std::shared_ptr<CachedShader>;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+using ProgramCode = std::vector<u64>;
+
+struct GraphicsPipelineCacheKey {
+    FixedPipelineState fixed_state;
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
+    RenderPassParams renderpass_params;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = fixed_state.Hash();
+        for (const auto& shader : shaders) {
+            boost::hash_combine(hash, shader);
+        }
+        boost::hash_combine(hash, renderpass_params.Hash());
+        return hash;
+    }
+
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+        return std::tie(fixed_state, shaders, renderpass_params) ==
+               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    }
+};
+
+struct ComputePipelineCacheKey {
+    GPUVAddr shader{};
+    u32 shared_memory_size{};
+    std::array<u32, 3> workgroup_size{};
+
+    std::size_t Hash() const noexcept {
+        return static_cast<std::size_t>(shader) ^
+               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
+               static_cast<std::size_t>(workgroup_size[0]) ^
+               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
+               (static_cast<std::size_t>(workgroup_size[2]) << 24);
+    }
+
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+        return std::tie(shader, shared_memory_size, workgroup_size) ==
+               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::GraphicsPipelineCacheKey> {
+    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+template <>
+struct hash<Vulkan::ComputePipelineCacheKey> {
+    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class CachedShader final : public RasterizerCacheObject {
+public:
+    explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+                          VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset);
+    ~CachedShader();
+
+    GPUVAddr GetGpuAddr() const {
+        return gpu_addr;
+    }
+
+    VAddr GetCpuAddr() const override {
+        return cpu_addr;
+    }
+
+    std::size_t GetSizeInBytes() const override {
+        return program_code.size() * sizeof(u64);
+    }
+
+    VideoCommon::Shader::ShaderIR& GetIR() {
+        return shader_ir;
+    }
+
+    const VideoCommon::Shader::ShaderIR& GetIR() const {
+        return shader_ir;
+    }
+
+    const ShaderEntries& GetEntries() const {
+        return entries;
+    }
+
+private:
+    static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
+                                                                 Tegra::Engines::ShaderType stage);
+
+    GPUVAddr gpu_addr{};
+    VAddr cpu_addr{};
+    ProgramCode program_code;
+    VideoCommon::Shader::ConstBufferLocker locker;
+    VideoCommon::Shader::ShaderIR shader_ir;
+    ShaderEntries entries;
+};
+
+class VKPipelineCache final : public RasterizerCache<Shader> {
+public:
+    explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
+                             const VKDevice& device, VKScheduler& scheduler,
+                             VKDescriptorPool& descriptor_pool,
+                             VKUpdateDescriptorQueue& update_descriptor_queue);
+    ~VKPipelineCache();
+
+    std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+
+    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
+
+protected:
+    void Unregister(const Shader& shader) override;
+
+    void FlushObjectInner(const Shader& object) override {}
+
+private:
+    std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders(
+        const GraphicsPipelineCacheKey& key);
+
+    Core::System& system;
+    const VKDevice& device;
+    VKScheduler& scheduler;
+    VKDescriptorPool& descriptor_pool;
+    VKUpdateDescriptorQueue& update_descriptor_queue;
+
+    VKRenderPassCache renderpass_cache;
+
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+
+    GraphicsPipelineCacheKey last_graphics_key;
+    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+
+    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
+        graphics_cache;
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
+};
+
+void FillDescriptorUpdateTemplateEntries(
+    const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset,
+    std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -0,0 +1,13 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/rasterizer_interface.h"
+
+namespace Vulkan {
+
+class RasterizerVulkan : public VideoCore::RasterizerInterface {};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -0,0 +1,100 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <vector>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+
+namespace Vulkan {
+
+VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
+
+VKRenderPassCache::~VKRenderPassCache() = default;
+
+vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
+    const auto [pair, is_cache_miss] = cache.try_emplace(params);
+    auto& entry = pair->second;
+    if (is_cache_miss) {
+        entry = CreateRenderPass(params);
+    }
+    return *entry;
+}
+
+UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
+    std::vector<vk::AttachmentDescription> descriptors;
+    std::vector<vk::AttachmentReference> color_references;
+
+    for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
+        const auto attachment = params.color_attachments[rt];
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(attachment.pixel_format));
+
+        // TODO(Rodrigo): Add eMayAlias when it's needed.
+        const auto color_layout = attachment.is_texception
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eColorAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+                                 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout);
+        color_references.emplace_back(static_cast<u32>(rt), color_layout);
+    }
+
+    vk::AttachmentReference zeta_attachment_ref;
+    if (params.has_zeta) {
+        const auto format =
+            MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
+        ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
+                   static_cast<u32>(params.zeta_pixel_format));
+
+        const auto zeta_layout = params.zeta_texception
+                                     ? vk::ImageLayout::eGeneral
+                                     : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format,
+                                 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad,
+                                 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout);
+        zeta_attachment_ref =
+            vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout);
+    }
+
+    const vk::SubpassDescription subpass_description(
+        {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()),
+        color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0,
+        nullptr);
+
+    vk::AccessFlags access;
+    vk::PipelineStageFlags stage;
+    if (!color_references.empty()) {
+        access |=
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    }
+
+    if (params.has_zeta) {
+        access |= vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                  vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+        stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
+    }
+
+    const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access,
+                                                   {});
+
+    const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()),
+                                               descriptors.data(), 1, &subpass_description, 1,
+                                               &subpass_dependency);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createRenderPassUnique(create_info, nullptr, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -0,0 +1,97 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+// TODO(Rodrigo): Optimize this structure for faster hashing
+
+struct RenderPassParams {
+    struct ColorAttachment {
+        u32 index = 0;
+        VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+        bool is_texception = false;
+
+        std::size_t Hash() const noexcept {
+            return static_cast<std::size_t>(pixel_format) |
+                   static_cast<std::size_t>(is_texception) << 6 |
+                   static_cast<std::size_t>(index) << 7;
+        }
+
+        bool operator==(const ColorAttachment& rhs) const noexcept {
+            return std::tie(index, pixel_format, is_texception) ==
+                   std::tie(rhs.index, rhs.pixel_format, rhs.is_texception);
+        }
+    };
+
+    boost::container::static_vector<ColorAttachment,
+                                    Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_attachments{};
+    // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type.
+    VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid;
+    bool has_zeta = false;
+    bool zeta_texception = false;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        for (const auto& rt : color_attachments) {
+            boost::hash_combine(hash, rt.Hash());
+        }
+        boost::hash_combine(hash, zeta_pixel_format);
+        boost::hash_combine(hash, has_zeta);
+        boost::hash_combine(hash, zeta_texception);
+        return hash;
+    }
+
+    bool operator==(const RenderPassParams& rhs) const {
+        return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) ==
+               std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta,
+                        rhs.zeta_texception);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::RenderPassParams> {
+    std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
+
+namespace Vulkan {
+
+class VKRenderPassCache final {
+public:
+    explicit VKRenderPassCache(const VKDevice& device);
+    ~VKRenderPassCache();
+
+    vk::RenderPass GetRenderPass(const RenderPassParams& params);
+
+private:
+    UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const;
+
+    const VKDevice& device;
+    std::unordered_map<RenderPassParams, UniqueRenderPass> cache;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
        {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
        MaxwellToVK::Sampler::Filter(tsc.min_filter),
        MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
        has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
        MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
        tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -954,6 +954,10 @@ private:

    Expression Visit(const Node& node) {
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
+            if (const auto amend_index = operation->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            const auto operation_index = static_cast<std::size_t>(operation->GetCode());
            const auto decompiler = operation_decompilers[operation_index];
            if (decompiler == nullptr) {
@@ -1142,6 +1146,10 @@ private:
        }

        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+            if (const auto amend_index = conditional->GetAmendIndex()) {
+                [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
+                ASSERT(type == Type::Void);
+            }
            // It's invalid to call conditional on nested nodes, use an operation instead
            const Id true_label = OpLabel();
            const Id skip_label = OpLabel();
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -0,0 +1,34 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+
+namespace Vulkan {
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
+    // Avoid undefined behavior by copying to a staging allocation
+    ASSERT(code_size % sizeof(u32) == 0);
+    const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
+    std::memcpy(data.get(), code_data, code_size);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get());
+    vk::ShaderModule shader_module;
+    if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) {
+        UNREACHABLE_MSG("Shader module failed to build!");
+    }
+
+    return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -0,0 +1,17 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
 // Refer to the license.txt file included.

 #include <algorithm>
-#include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"

 namespace Vulkan {

+namespace {
+
 constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
 constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;

-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
-    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
-                                                                                   pipeline_stage} {
-    CreateBuffers(memory_manager, usage);
-    ReserveWatches(WATCHES_INITIAL_RESERVE);
+constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
+
+std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
+                                  vk::MemoryPropertyFlags wanted) {
+    const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
+    for (u32 i = 0; i < properties.memoryTypeCount; i++) {
+        if (!(filter & (1 << i))) {
+            continue;
+        }
+        if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
+            return i;
+        }
+    }
+    return {};
+}
+
+} // Anonymous namespace
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                               vk::BufferUsageFlags usage)
+    : device{device}, scheduler{scheduler} {
+    CreateBuffers(usage);
+    ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
+    ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
 }

 VKStreamBuffer::~VKStreamBuffer() = default;

-std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
-    ASSERT(size <= buffer_size);
+std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
+    ASSERT(size <= STREAM_BUFFER_SIZE);
    mapped_size = size;

-    if (offset + size > buffer_size) {
-        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
-        // reset the state.
-        invalidation_mark = used_watches;
-        used_watches = 0;
+    if (alignment > 0) {
+        offset = Common::AlignUp(offset, alignment);
+    }
+
+    WaitPendingOperations(offset);
+
+    bool invalidated = false;
+    if (offset + size > STREAM_BUFFER_SIZE) {
+        // The buffer would overflow, save the amount of used watches and reset the state.
+        invalidation_mark = current_watch_cursor;
+        current_watch_cursor = 0;
        offset = 0;
-    }

-    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
-}
+        // Swap watches and reset waiting cursors.
+        std::swap(previous_watches, current_watches);
+        wait_cursor = 0;
+        wait_bound = 0;

-void VKStreamBuffer::Send(u64 size) {
-    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
-
-    if (invalidation_mark) {
-        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        // Ensure that we don't wait for uncommitted fences.
        scheduler.Flush();
-        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
-                      [&](auto& resource) { resource->Wait(); });
-        invalidation_mark = std::nullopt;
+
+        invalidated = true;
    }

-    if (used_watches + 1 >= watches.size()) {
-        // Ensure that there are enough watches.
-        ReserveWatches(WATCHES_RESERVE_CHUNK);
-    }
-    // Add a watch for this allocation.
-    watches[used_watches++]->Watch(scheduler.GetFence());
-
-    offset += size;
-}
-
-void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
-    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
-                                         nullptr);
-
    const auto dev = device.GetLogical();
    const auto& dld = device.GetDispatchLoader();
-    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
-    commit = memory_manager.Commit(*buffer, true);
-    mapped_pointer = commit->GetData();
+    const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
+    return {pointer, offset, invalidated};
 }

-void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
-    const std::size_t previous_size = watches.size();
-    watches.resize(previous_size + grow_size);
-    std::generate(watches.begin() + previous_size, watches.end(),
-                  []() { return std::make_unique<VKFenceWatch>(); });
+void VKStreamBuffer::Unmap(u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    const auto dev = device.GetLogical();
+    dev.unmapMemory(*memory, device.GetDispatchLoader());
+
+    offset += size;
+
+    if (current_watch_cursor + 1 >= current_watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
+    }
+    auto& watch = current_watches[current_watch_cursor++];
+    watch.upper_bound = offset;
+    watch.fence.Watch(scheduler.GetFence());
+}
+
+void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
+                                         0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+
+    const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
+    // Prefer device local host visible allocations (this should hit AMD's pinned memory).
+    auto type = FindMemoryType(device, requirements.memoryTypeBits,
+                               vk::MemoryPropertyFlagBits::eHostVisible |
+                                   vk::MemoryPropertyFlagBits::eHostCoherent |
+                                   vk::MemoryPropertyFlagBits::eDeviceLocal);
+    if (!type) {
+        // Otherwise search for a host visible allocation.
+        type = FindMemoryType(device, requirements.memoryTypeBits,
+                              vk::MemoryPropertyFlagBits::eHostVisible |
+                                  vk::MemoryPropertyFlagBits::eHostCoherent);
+        ASSERT_MSG(type, "No host visible and coherent memory type found");
+    }
+    const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
+    memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
+
+    dev.bindBufferMemory(*buffer, *memory, 0, dld);
+}
+
+void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
+    watches.resize(watches.size() + grow_size);
+}
+
+void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
+    if (!invalidation_mark) {
+        return;
+    }
+    while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
+        auto& watch = previous_watches[wait_cursor];
+        wait_bound = watch.upper_bound;
+        watch.fence.Wait();
+        ++wait_cursor;
+    }
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@

 #pragma once

-#include <memory>
 #include <optional>
 #include <tuple>
 #include <vector>

 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/declarations.h"
-#include "video_core/renderer_vulkan/vk_memory_manager.h"

 namespace Vulkan {

 class VKDevice;
 class VKFence;
 class VKFenceWatch;
-class VKResourceManager;
 class VKScheduler;

-class VKStreamBuffer {
+class VKStreamBuffer final {
 public:
-    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
-                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
-                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+                            vk::BufferUsageFlags usage);
    ~VKStreamBuffer();

    /**
@@ -34,39 +30,47 @@ public:
     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
     * offset and a boolean that's true when buffer has been invalidated.
     */
-    std::tuple<u8*, u64, bool> Reserve(u64 size);
+    std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);

    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
-    void Send(u64 size);
+    void Unmap(u64 size);

-    vk::Buffer GetBuffer() const {
+    vk::Buffer GetHandle() const {
        return *buffer;
    }

 private:
+    struct Watch final {
+        VKFenceWatch fence;
+        u64 upper_bound{};
+    };
+
    /// Creates Vulkan buffer handles committing the required the required memory.
-    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+    void CreateBuffers(vk::BufferUsageFlags usage);

    /// Increases the amount of watches available.
-    void ReserveWatches(std::size_t grow_size);
+    void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
+
+    void WaitPendingOperations(u64 requested_upper_bound);

    const VKDevice& device;                      ///< Vulkan device manager.
    VKScheduler& scheduler;                      ///< Command scheduler.
-    const u64 buffer_size;                       ///< Total size of the stream buffer.
    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.

-    UniqueBuffer buffer;   ///< Mapped buffer.
-    VKMemoryCommit commit; ///< Memory commit.
-    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+    UniqueBuffer buffer;       ///< Mapped buffer.
+    UniqueDeviceMemory memory; ///< Memory allocation.

    u64 offset{};      ///< Buffer iterator.
    u64 mapped_size{}; ///< Size reserved for the current copy.

-    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
-    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
-    std::optional<std::size_t>
-        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+    std::vector<Watch> current_watches;           ///< Watches recorded in the current iteration.
+    std::size_t current_watch_cursor{};           ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
+
+    std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
+    std::size_t wait_cursor{};           ///< Last watch being waited for completion.
+    u64 wait_bound{};                    ///< Highest offset being watched for completion.
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -0,0 +1,57 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler)
+    : device{device}, scheduler{scheduler} {}
+
+VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
+
+void VKUpdateDescriptorQueue::TickFrame() {
+    payload.clear();
+}
+
+void VKUpdateDescriptorQueue::Acquire() {
+    entries.clear();
+}
+
+void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
+                                   vk::DescriptorSet set) {
+    if (payload.size() + entries.size() >= payload.max_size()) {
+        LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
+        scheduler.WaitWorker();
+        payload.clear();
+    }
+
+    const auto payload_start = payload.data() + payload.size();
+    for (const auto& entry : entries) {
+        if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) {
+            payload.push_back(*image);
+        } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
+            payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
+        } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) {
+            payload.push_back(*texel);
+        } else {
+            UNREACHABLE();
+        }
+    }
+
+    scheduler.Record([dev = device.GetLogical(), payload_start, set,
+                      update_template]([[maybe_unused]] auto cmdbuf, auto& dld) {
+        dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld);
+    });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -0,0 +1,86 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <variant>
+#include <boost/container/static_vector.hpp>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKScheduler;
+
+class DescriptorUpdateEntry {
+public:
+    explicit DescriptorUpdateEntry() : image{} {}
+
+    DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {}
+
+    DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size)
+        : buffer{buffer, offset, size} {}
+
+    DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {}
+
+private:
+    union {
+        vk::DescriptorImageInfo image;
+        vk::DescriptorBufferInfo buffer;
+        vk::BufferView texel_buffer;
+    };
+};
+
+class VKUpdateDescriptorQueue final {
+public:
+    explicit VKUpdateDescriptorQueue(const VKDevice& device, VKScheduler& scheduler);
+    ~VKUpdateDescriptorQueue();
+
+    void TickFrame();
+
+    void Acquire();
+
+    void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set);
+
+    void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}});
+    }
+
+    void AddImage(vk::ImageView image_view) {
+        entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}});
+    }
+
+    void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) {
+        entries.push_back(Buffer{buffer, offset, size});
+    }
+
+    void AddTexelBuffer(vk::BufferView texel_buffer) {
+        entries.emplace_back(texel_buffer);
+    }
+
+    vk::ImageLayout* GetLastImageLayout() {
+        return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout;
+    }
+
+private:
+    struct Buffer {
+        const vk::Buffer* buffer{};
+        u64 offset{};
+        std::size_t size{};
+    };
+    using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>;
+    // Old gcc versions don't consider this trivially copyable.
+    // static_assert(std::is_trivially_copyable_v<Variant>);
+
+    const VKDevice& device;
+    VKScheduler& scheduler;
+
+    boost::container::static_vector<Variant, 0x400> entries;
+    boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {

 struct CFGRebuildState {
    explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
-        : program_code{program_code}, start{start}, locker{locker} {}
+        : program_code{program_code}, locker{locker}, start{start} {}

    const ProgramCode& program_code;
    ConstBufferLocker& locker;
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
 #include <vector>
 #include <fmt/format.h>

+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -22,34 +23,39 @@ using Tegra::Shader::Register;

 namespace {

-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::UnsignedByte:
-    case Tegra::Shader::UniformType::Single:
-        return 1;
-    case Tegra::Shader::UniformType::Double:
-        return 2;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
    default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        UNREACHABLE();
+        return 0;
    }
 }

-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
    case Tegra::Shader::UniformType::Single:
-        return 1;
+        return 32;
    case Tegra::Shader::UniformType::Double:
-        return 2;
+        return 64;
    case Tegra::Shader::UniformType::Quad:
    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 128;
    default:
        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        return 32;
    }
 }

@@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }();

        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, true, false);

-        const u32 count = GetLdgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
@@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);

-            if (type == Tegra::Shader::UniformType::UnsignedByte) {
-                // To handle unaligned loads get the byte used to dereferenced global memory
-                // and extract that byte from the loaded uint32.
-                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
-                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));

-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
-                                 Immediate(8));
+                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
+                                 std::move(offset), Immediate(size));
            }

            SetTemporary(bb, i, gmem);
@@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            }
        }();

+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
        const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true);
+            TrackGlobalMemory(bb, instr, is_read, true);
        if (!real_address_base || !base_address) {
            // Tracking failed, skip the store.
            break;
        }

-        const u32 count = GetStgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            const Node value = GetRegister(instr.gpr0.Value() + i);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                Node mask = Immediate(GetUnalignedMask(type));
+                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
+
+                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
+                                  Immediate(size));
+            }
+
            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
@@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                     Instruction instr,
-                                                                     bool is_write) {
+                                                                     bool is_read, bool is_write) {
    const auto addr_register{GetRegister(instr.gmem.gpr)};
    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};

@@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
    const GlobalMemoryBase descriptor{index, offset};
    const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
    auto& usage = entry->second;
-    if (is_write) {
-        usage.is_written = true;
-    } else {
-        usage.is_read = true;
-    }
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;

    const auto real_address =
        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -794,14 +794,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(

 std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
                                                bool is_tld4) {
-    const auto [coord_offsets, size, wrap_value,
-                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
-        if (is_tld4) {
-            return {{0, 8, 16}, 6, 32, 64};
-        } else {
-            return {{0, 4, 8}, 4, 8, 16};
-        }
-    }();
+    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+    const u32 size = is_tld4 ? 6 : 4;
+    const s32 wrap_value = is_tld4 ? 32 : 8;
+    const s32 diff_value = is_tld4 ? 64 : 16;
    const u32 mask = (1U << size) - 1;

    std::vector<Node> aoffi;
@@ -814,7 +810,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
        LOG_WARNING(HW_GPU,
                    "AOFFI constant folding failed, some hardware might have graphical issues");
        for (std::size_t coord = 0; coord < coord_count; ++coord) {
-            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
            const Node condition =
                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +820,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
    }

    for (std::size_t coord = 0; coord < coord_count; ++coord) {
-        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
        if (value >= wrap_value) {
            value -= diff_value;
        }
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -392,8 +392,30 @@ struct MetaImage {
 using Meta =
    std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;

+class AmendNode {
+public:
+    std::optional<std::size_t> GetAmendIndex() const {
+        if (amend_index == amend_null_index) {
+            return std::nullopt;
+        }
+        return {amend_index};
+    }
+
+    void SetAmendIndex(std::size_t index) {
+        amend_index = index;
+    }
+
+    void ClearAmend() {
+        amend_index = amend_null_index;
+    }
+
+private:
+    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
+    std::size_t amend_index{amend_null_index};
+};
+
 /// Holds any kind of operation that can be done in the IR
-class OperationNode final {
+class OperationNode final : public AmendNode {
 public:
    explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}

@@ -433,7 +455,7 @@ private:
 };

 /// Encloses inside any kind of node that returns a boolean conditionally-executed code
-class ConditionalNode final {
+class ConditionalNode final : public AmendNode {
 public:
    explicit ConditionalNode(Node condition, std::vector<Node>&& code)
        : condition{std::move(condition)}, code{std::move(code)} {}
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
                     Immediate(bits));
 }

+std::size_t ShaderIR::DeclareAmend(Node new_amend) {
+    const std::size_t id = amend_code.size();
+    amend_code.push_back(new_amend);
+    return id;
+}
+
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -176,6 +176,10 @@ public:
    /// Returns a condition code evaluated from internal flags
    Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;

+    const Node& GetAmendNode(std::size_t index) const {
+        return amend_code[index];
+    }
+
 private:
    friend class ASTDecoder;

@@ -390,7 +394,10 @@ private:

    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                               Tegra::Shader::Instruction instr,
-                                                               bool is_write);
+                                                               bool is_read, bool is_write);
+
+    /// Register new amending code and obtain the reference id.
+    std::size_t DeclareAmend(Node new_amend);

    const ProgramCode& program_code;
    const u32 main_offset;
@@ -406,6 +413,7 @@ private:
    std::map<u32, NodeBlock> basic_blocks;
    NodeBlock global_code;
    ASTManager program_manager{true, true};
+    std::vector<Node> amend_code;

    std::set<u32> used_registers;
    std::set<Tegra::Shader::Pred> used_predicates;
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -215,18 +215,11 @@ void GRenderWindow::moveContext() {
 }

 void GRenderWindow::SwapBuffers() {
-    // In our multi-threaded QWidget use case we shouldn't need to call `makeCurrent`,
-    // since we never call `doneCurrent` in this thread.
-    // However:
-    // - The Qt debug runtime prints a bogus warning on the console if `makeCurrent` wasn't called
-    // since the last time `swapBuffers` was executed;
-    // - On macOS, if `makeCurrent` isn't called explicitly, resizing the buffer breaks.
-    context->makeCurrent(child);
-
    context->swapBuffers(child);
+
    if (!first_frame) {
-        emit FirstFrameDisplayed();
        first_frame = true;
+        emit FirstFrameDisplayed();
    }
 }

--- a/src/yuzu/configuration/configure_hotkeys.cpp
+++ b/src/yuzu/configuration/configure_hotkeys.cpp
@@ -48,6 +48,7 @@ void ConfigureHotkeys::Populate(const HotkeyRegistry& registry) {
    }

    ui->hotkey_list->expandAll();
+    ui->hotkey_list->resizeColumnToContents(0);
 }

 void ConfigureHotkeys::changeEvent(QEvent* event) {
Author	SHA1	Message	Date
ReinUsesLisp	66a1c777c9	gl_texture_cache: Use local variables to simplify DownloadTexture	2020-01-14 17:39:48 -03:00
ReinUsesLisp	cdb00546f0	gl_texture_cache: Fix format for RGBX16F	2020-01-14 17:38:33 -03:00
ReinUsesLisp	2d09467f6f	gl_texture_cache: Use Snorm internal format for RG8S	2020-01-14 17:37:58 -03:00
ReinUsesLisp	02624c35ec	gl_texture_cache: Use Snorm internal format for ABGR8S	2020-01-14 17:37:23 -03:00
Rodrigo Locatti	64cd46579b	Merge pull request #3303 from lioncash/reorder control_flow: Silence -Wreorder warning for CFGRebuildState	2020-01-14 16:15:18 -03:00
Rodrigo Locatti	81e9e229fa	Merge pull request #3302 from lioncash/unused-var gl_shader_cache: Remove unused variables	2020-01-14 16:14:47 -03:00
Lioncash	a1eee1749e	control_flow: Silence -Wreorder warning for CFGRebuildState Organizes the initializer list in the same order that the variables would actually be initialized in.	2020-01-14 13:28:48 -05:00
bunnei	a83e28b237	Merge pull request #3296 from Simek/hotkeys_resize GUI/configure: resize hotkeys action column to fit content	2020-01-14 13:17:16 -05:00
Lioncash	f10ea944e0	gl_shader_cache: Remove unused STAGE_RESERVED_UBOS constant Given this isn't used, this can be removed entirely.	2020-01-14 13:16:52 -05:00
Lioncash	4cd5ad90f3	gl_shader_cache: std::move entries in CachedShader constructor Avoids several reallocations of std::vector instances where applicable.	2020-01-14 13:14:16 -05:00
Lioncash	15a6840e7a	gl_shader_cache: Remove unused entries variable in BuildShader() Eliminates a few unnecessary constructions of std::vectors.	2020-01-14 13:11:49 -05:00
bunnei	55f95e7f26	Merge pull request #3287 from ReinUsesLisp/ldg-stg-16 shader_ir/memory: Implement u16 and u8 for STG and LDG	2020-01-14 09:57:08 -05:00
bunnei	15788ffcde	Merge pull request #3288 from ReinUsesLisp/uncurse-aoffi shader_ir/texture: Simplify AOFFI code	2020-01-13 23:52:12 -05:00
bunnei	6985eea519	Merge pull request #3290 from ReinUsesLisp/gl-clamp maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver	2020-01-13 19:16:06 -05:00
bunnei	e749f17257	Merge pull request #3292 from degasus/heap_space_fix core/kernel: Fix GetTotalPhysicalMemoryUsed.	2020-01-13 19:15:43 -05:00
Bartosz Kaszubowski	6726e8b784	GUI/configure: resize hotkeys column to content	2020-01-12 22:46:28 +01:00
Fernando Sahmkow	43fc793439	Merge pull request #3283 from ReinUsesLisp/vk-compute-pass vk_compute_pass: Add compute passes to emulate missing Vulkan features	2020-01-12 11:14:59 -04:00
Markus Wick	c76ffa5019	core/kernel: Fix GetTotalPhysicalMemoryUsed. module._memory was already moved over to a new shared_ptr. So code_memory_size was not increased at all. This lowers the heap space and so saves a bit of memory, usually between 50 to 100 MB. This fixes a regression of `c0a01f3adc`	2020-01-11 14:04:44 +01:00
Rodrigo Locatti	b1138e5ea1	vk_compute_pass: Address feedback Comment hardcoded SPIR-V modules.	2020-01-10 22:46:34 -03:00
ReinUsesLisp	3d46709b7f	maxwell_to_vk: Implement GL_CLAMP hacking Nvidia's driver Nvidia's driver defaults invalid enumerations to GL_CLAMP. Vulkan doesn't expose GL_CLAMP through its API, but we can hack it on Nvidia's driver using the internal driver defaults.	2020-01-10 17:12:50 -03:00
ReinUsesLisp	13021b534c	shader_ir/texture: Simplify AOFFI code	2020-01-09 03:50:37 -03:00
ReinUsesLisp	e2a2a556b9	shader_ir/memory: Implement u16 and u8 for STG and LDG Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.	2020-01-09 02:12:29 -03:00
ReinUsesLisp	908e085d02	vk_compute_pass: Add compute passes to emulate missing Vulkan features This currently only supports quad arrays and u8 indices. In the future we can remove quad arrays with a table written from the CPU, but this was used to bootstrap the other passes helpers and it was left in the code. The blob code is generated from the "shaders/" directory. Read the instructions there to know how to generate the SPIR-V.	2020-01-08 19:24:26 -03:00
ReinUsesLisp	82a64da077	vk_shader_util: Add helper to build SPIR-V shaders	2020-01-08 19:22:20 -03:00
Fernando Sahmkow	80436c1330	Merge pull request #3279 from ReinUsesLisp/vk-pipeline-cache vk_pipeline_cache: Initial implementation	2020-01-08 17:31:20 -04:00
bunnei	319c4d2108	Merge pull request #3272 from bunnei/vi-close-layer service: vi: Implement CloseLayer.	2020-01-07 12:45:34 -05:00
ReinUsesLisp	6888d776ff	vk_pipeline_cache: Initial implementation Given a pipeline key, this cache returns a pipeline abstraction (for graphics or compute).	2020-01-06 22:02:26 -03:00
ReinUsesLisp	2effdeb924	vk_graphics_pipeline: Initial implementation This abstractio represents the state of the 3D engine at a given draw. Instead of changing individual bits of the pipeline how it's done in APIs like D3D11, OpenGL and NVN; on Vulkan we are forced to put everything together into a single, immutable object. It takes advantage of the few dynamic states Vulkan offers.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	dc96a59fa0	vk_compute_pipeline: Initial implementation This abstraction represents a Vulkan compute pipeline.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	b392a5986e	vk_pipeline_cache: Add file and define descriptor update template filler This function allows us to share code between compute and graphics pipelines compilation.	2020-01-06 22:02:26 -03:00
ReinUsesLisp	3142f1b597	fixed_pipeline_state: Add depth clamp	2020-01-06 22:02:26 -03:00
ReinUsesLisp	9c548146ca	vk_rasterizer: Add placeholder	2020-01-06 22:02:26 -03:00
bunnei	5be00cba15	Merge pull request #3276 from ReinUsesLisp/pipeline-reqs vk_update_descriptor/vk_renderpass_cache: Add pipeline cache dependencies	2020-01-06 17:03:34 -05:00
bunnei	ee9b4a7f9a	Merge pull request #3278 from ReinUsesLisp/vk-memory-manager renderer_vulkan: Buffer cache, stream buffer and memory manager changes	2020-01-06 17:03:04 -05:00
ReinUsesLisp	5aeff9aff5	vk_renderpass_cache: Initial implementation The renderpass cache is used to avoid creating renderpasses on each draw. The hashed structure is not currently optimized.	2020-01-06 18:28:32 -03:00
ReinUsesLisp	322d6a0311	vk_update_descriptor: Initial implementation The update descriptor is used to store in flat memory a large chunk of staging data used to update descriptor sets through templates. It provides a push interface to easily insert descriptors following the current pipeline. The order used in the descriptor update template has to be implicitly followed. We can catch bugs here using validation layers.	2020-01-06 18:28:32 -03:00
ReinUsesLisp	5b01f80a12	vk_stream_buffer/vk_buffer_cache: Avoid halting and use generic cache The stream buffer before this commit once it was full (no more bytes to write before looping) waiting for all previous operations to finish. This was a temporary solution and had a noticeable performance penalty in performance (from what a profiler showed). To avoid this mark with fences usages of the stream buffer and once it loops wait for them to be signaled. On average this will never wait. Each fence knows where its usage finishes, resulting in a non-paged stream buffer. On the other side, the buffer cache is reimplemented using the generic buffer cache. It makes use of the staging buffer pool and the new stream buffer.	2020-01-06 18:13:41 -03:00
ReinUsesLisp	ceb851b590	vk_memory_manager: Misc changes * Allocate memory in discrete exponentially increasing chunks until the 128 MiB threshold. Allocations larger thant that increase linearly by 256 MiB (depending on the required size). This allows to use small allocations for small resources. * Move memory maps to a RAII abstraction. To optimize for debugging tools (like RenderDoc) users will map/unmap on usage. If this ever becomes a noticeable overhead (from my profiling it doesn't) we can transparently move to persistent memory maps without harming the API, getting optimal performance for both gameplay and debugging. * Improve messages on exceptional situations. * Fix typos "requeriments" -> "requirements". * Small style changes.	2020-01-06 18:13:41 -03:00
ReinUsesLisp	85bb6a6f08	vk_buffer_cache: Temporarily remove buffer cache This is intended for a follow up commit to avoid circular dependencies.	2020-01-06 17:58:46 -03:00
bunnei	984563b773	Merge pull request #3277 from ReinUsesLisp/make-current yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers	2020-01-06 14:09:19 -05:00
ReinUsesLisp	8306703a7d	yuzu/bootmanager: Remove {glx,wgl}MakeCurrent on SwapBuffers MakeCurrent is a costly (according to Nsight's profiler it takes a tenth of a millisecond to complete), and we don't have a reason to call it because: - Qt no longer signals a warning if it's not called - yuzu no longer supports macOS	2020-01-06 14:02:47 -03:00
bunnei	09908207fb	Merge pull request #3261 from degasus/page_table core/memory + arm/dynarmic: Use a global offset within our arm page table.	2020-01-06 11:56:59 -05:00
bunnei	89fc75d769	Merge pull request #3257 from degasus/no_busy_loops video_core: Block in WaitFence.	2020-01-06 00:09:57 -05:00
Fernando Sahmkow	56e450a3f7	Merge pull request #3264 from ReinUsesLisp/vk-descriptor-pool vk_descriptor_pool: Initial implementation	2020-01-05 15:54:41 -04:00
bunnei	6fe51f398f	Merge pull request #2945 from FernandoS27/fix-bcat nifm: Only return that there's an internet connection when there's a BCATServer	2020-01-05 02:17:16 -05:00
bunnei	cd0a7dfdbc	Merge pull request #3258 from FernandoS27/shader-amend Shader_IR: add the ability to amend code in the shader ir.	2020-01-04 14:05:17 -05:00
Fernando Sahmkow	3dd6b55851	Shader_IR: Address Feedback	2020-01-04 14:40:57 -04:00
bunnei	64c5631579	service: vi: Implement CloseLayer. - Needed for Undertale.	2020-01-04 00:45:06 -05:00
Rodrigo Locatti	6e347d8d1b	Update src/video_core/renderer_vulkan/vk_descriptor_pool.cpp Co-Authored-By: Mat M. <mathew1800@gmail.com>	2020-01-03 17:34:30 -03:00
ReinUsesLisp	1fe7df4517	vk_descriptor_pool: Initial implementation Create a large descriptor pool where we allocate all our descriptors from. It has to be wide enough to support any pipeline, hence its large numbers. If the descritor pool is filled, we allocate more memory at that moment. This way we can take advantage of permissive drivers like Nvidia's that allocate more descriptors than what the spec requires.	2020-01-01 16:44:06 -03:00
Markus Wick	0986caa8d8	core/memory + arm/dynarmic: Use a global offset within our arm page table. This saves us two x64 instructions per load/store instruction. TODO: Clean up our memory code. We can use this optimization here as well.	2020-01-01 12:24:54 +01:00
Fernando Sahmkow	b3371ed09e	Shader_IR: add the ability to amend code in the shader ir. This commit introduces a mechanism by which shader IR code can be amended and extended. This useful for track algorithms where certain information can derived from before the track such as indexes to array samplers.	2019-12-30 15:31:48 -04:00
Markus Wick	cb9dd01ffd	video_core: Block in WaitFence. This function is called rarely and blocks quite often for a long time. So don't waste power and let the CPU sleep. This might also increase the performance as the other cores might be allowed to clock higher.	2019-12-30 13:04:53 +01:00
Fernando Sahmkow	3c95e49c42	nifm: Only return that there's an internet connection when there's a BCATServer This helps games that need internet for other purposes boot as the rest of our internet infrastructure is incomplete.	2019-11-06 23:10:32 -05:00