vk_stream_buffer: Remove copy code path

vk_stream_buffer: Implement a stream buffer
This manages two kinds of streaming buffers: one for unified memory models and one for dedicated GPUs. The first one skips the copy from the staging buffer to the real buffer, since it creates an unified buffer. This implementation waits for all fences to finish their operation before "invalidating". This is suboptimal since it should allocate another buffer or start searching from the beginning. There is room for improvement here. This could also handle AMD's "pinned" memory (a heap with 256 MiB) that seems to be designed for buffer streaming.
2019-02-26 02:09:43 -03:00 · 2019-02-24 04:27:51 -03:00 · 2019-02-24 04:19:04 -03:00
61 changed files with 444 additions and 533 deletions
--- a/externals/cubeb
+++ b/externals/cubeb
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,18 +46,16 @@ struct AudioRendererParameter {
    u32_le sample_rate;
    u32_le sample_count;
    u32_le mix_buffer_count;
-    u32_le submix_count;
+    u32_le unknown_c;
    u32_le voice_count;
    u32_le sink_count;
    u32_le effect_count;
-    u32_le performance_frame_count;
-    u8 is_voice_drop_enabled;
-    u8 unknown_21;
-    u8 unknown_22;
-    u8 execution_mode;
+    u32_le unknown_1c;
+    u8 unknown_20;
+    INSERT_PADDING_BYTES(3);
    u32_le splitter_count;
-    u32_le num_splitter_send_channels;
-    u32_le unknown_30;
+    u32_le unknown_2c;
+    INSERT_PADDING_WORDS(1);
    u32_le revision;
 };
 static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
        }
    }

-    state.yn1 = static_cast<s16>(yn1);
-    state.yn2 = static_cast<s16>(yn2);
+    state.yn1 = yn1;
+    state.yn2 = yn2;

    return ret;
 }
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,10 +12,6 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"

-#ifdef _MSC_VER
-#include <objbase.h>
-#endif
-
 namespace AudioCore {

 class CubebSinkStream final : public SinkStream {
@@ -50,7 +46,7 @@ public:
        }
    }

-    ~CubebSinkStream() override {
+    ~CubebSinkStream() {
        if (!ctx) {
            return;
        }
@@ -79,11 +75,11 @@ public:
        queue.Push(samples);
    }

-    std::size_t SamplesInQueue(u32 channel_count) const override {
+    std::size_t SamplesInQueue(u32 num_channels) const override {
        if (!ctx)
            return 0;

-        return queue.Size() / channel_count;
+        return queue.Size() / num_channels;
    }

    void Flush() override {
@@ -102,7 +98,7 @@ private:
    u32 num_channels{};

    Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame{};
+    std::array<s16, 2> last_frame;
    std::atomic<bool> should_flush{};
    TimeStretcher time_stretch;

@@ -112,11 +108,6 @@ private:
 };

 CubebSink::CubebSink(std::string_view target_device_name) {
-    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
-#ifdef _MSC_VER
-    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
-#endif
-
    if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
        LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
        return;
@@ -151,12 +142,6 @@ CubebSink::~CubebSink() {
    }

    cubeb_destroy(ctx);
-
-#ifdef _MSC_VER
-    if (SUCCEEDED(com_init_result)) {
-        CoUninitialize();
-    }
-#endif
 }

 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,10 +25,6 @@ private:
    cubeb* ctx{};
    cubeb_devid output_device{};
    std::vector<SinkStreamPtr> sink_streams;
-
-#ifdef _MSC_VER
-    u32 com_init_result = 0;
-#endif
 };

 std::vector<std::string> ListCubebSinkDevices();
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
 /**
 * Decode a color stored in RGBA8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
    return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }

 /**
 * Decode a color stored in RGB8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
    return {bytes[2], bytes[1], bytes[0], 255};
 }

 /**
 * Decode a color stored in RG8 (aka HILO8) format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
    return {bytes[1], bytes[0], 0, 255};
 }

 /**
 * Decode a color stored in RGB565 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
 /**
 * Decode a color stored in RGB5A1 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
 /**
 * Decode a color stored in RGBA4 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Common::Vec4<u8>
+ * @return Result color decoded as Math::Vec4<u8>
 */
-inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
 /**
 * Decode a depth value and a stencil value stored in D24S8 format
 * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Common::Vec2
+ * @return Resulting values stored as a Math::Vec2
 */
-inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
    return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }

@@ -149,7 +149,7 @@ inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
    bytes[3] = color.r();
    bytes[2] = color.g();
    bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
    bytes[2] = color.r();
    bytes[1] = color.g();
    bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
    bytes[1] = color.r();
    bytes[0] = color.g();
 }
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
    const u16_le data =
        (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());

@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());

@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());

--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
 #include <cstdlib>
 #include <type_traits>

-namespace Common {
+namespace MathUtil {

 constexpr float PI = 3.14159265f;

@@ -41,4 +41,4 @@ struct Rectangle {
    }
 };

-} // namespace Common
+} // namespace MathUtil
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@

 #include "common/vector_math.h"

-namespace Common {
+namespace Math {

 template <typename T>
 class Quaternion {
 public:
-    Vec3<T> xyz;
+    Math::Vec3<T> xyz;
    T w{};

    Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
 };

 template <typename T>
-auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
+auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
    return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
 }

-inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
+inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
    return {axis * std::sin(angle / 2), std::cos(angle / 2)};
 }

-} // namespace Common
+} // namespace Math
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC
-#ifdef __GNUC__
+// GCC 4.6+
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif defined(__clang__)
+#elif __clang__

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
 #include <cmath>
 #include <type_traits>

-namespace Common {
+namespace Math {

 template <typename T>
 class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
    return MakeVec(x, yzw[0], yzw[1], yzw[2]);
 }

-} // namespace Common
+} // namespace Math
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
    if (offset + length > data.size())
        data.resize(offset + length);
    const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data() + offset, data_, write);
+    std::memcpy(data.data(), data_, write);
    return write;
 }

--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
            framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
 }

-std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
+std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
    new_x = std::max(new_x, framebuffer_layout.screen.left);
    new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
    /**
     * Clip the provided coordinates to be inside the touchscreen area.
     */
-    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
+    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
 };

 } // namespace Core::Frontend
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {

 // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
 template <class T>
-static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
-                                         float screen_aspect_ratio) {
+static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
+                                           float screen_aspect_ratio) {
    float scale = std::min(static_cast<float>(window_area.GetWidth()),
                           window_area.GetHeight() / screen_aspect_ratio);
-    return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
-                                static_cast<T>(std::round(scale * screen_aspect_ratio))};
+    return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
+                                  static_cast<T>(std::round(scale * screen_aspect_ratio))};
 }

 FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {

    const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
                                       ScreenUndocked::Width};
-    Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
-    Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
+    MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
+    MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);

    float window_aspect_ratio = static_cast<float>(height) / width;

--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
    unsigned width{ScreenUndocked::Width};
    unsigned height{ScreenUndocked::Height};

-    Common::Rectangle<unsigned> screen;
+    MathUtil::Rectangle<unsigned> screen;

    /**
     * Returns the ration of pixel size of the screen, compared to the native size of the undocked
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
 *   Orientation is determined by right-hand rule.
 *   Units: deg/sec
 */
-using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
+using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;

 /**
 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,7 +14,6 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
-constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,47 +14,32 @@
 namespace Kernel {
 namespace {
 constexpr u16 GetSlot(Handle handle) {
-    return static_cast<u16>(handle >> 15);
+    return handle >> 15;
 }

 constexpr u16 GetGeneration(Handle handle) {
-    return static_cast<u16>(handle & 0x7FFF);
+    return handle & 0x7FFF;
 }
 } // Anonymous namespace

 HandleTable::HandleTable() {
+    next_generation = 1;
    Clear();
 }

 HandleTable::~HandleTable() = default;

-ResultCode HandleTable::SetSize(s32 handle_table_size) {
-    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
-        return ERR_OUT_OF_MEMORY;
-    }
-
-    // Values less than or equal to zero indicate to use the maximum allowable
-    // size for the handle table in the actual kernel, so we ignore the given
-    // value in that case, since we assume this by default unless this function
-    // is called.
-    if (handle_table_size > 0) {
-        table_size = static_cast<u16>(handle_table_size);
-    }
-
-    return RESULT_SUCCESS;
-}
-
 ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
    DEBUG_ASSERT(obj != nullptr);

-    const u16 slot = next_free_slot;
-    if (slot >= table_size) {
+    u16 slot = next_free_slot;
+    if (slot >= generations.size()) {
        LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
        return ERR_HANDLE_TABLE_FULL;
    }
    next_free_slot = generations[slot];

-    const u16 generation = next_generation++;
+    u16 generation = next_generation++;

    // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
    // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -79,11 +64,10 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 }

 ResultCode HandleTable::Close(Handle handle) {
-    if (!IsValid(handle)) {
+    if (!IsValid(handle))
        return ERR_INVALID_HANDLE;
-    }

-    const u16 slot = GetSlot(handle);
+    u16 slot = GetSlot(handle);

    objects[slot] = nullptr;

@@ -93,10 +77,10 @@ ResultCode HandleTable::Close(Handle handle) {
 }

 bool HandleTable::IsValid(Handle handle) const {
-    const std::size_t slot = GetSlot(handle);
-    const u16 generation = GetGeneration(handle);
+    std::size_t slot = GetSlot(handle);
+    u16 generation = GetGeneration(handle);

-    return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
+    return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
 }

 SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -113,7 +97,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
 }

 void HandleTable::Clear() {
-    for (u16 i = 0; i < table_size; ++i) {
+    for (u16 i = 0; i < MAX_COUNT; ++i) {
        generations[i] = i + 1;
        objects[i] = nullptr;
    }
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -49,20 +49,6 @@ public:
    HandleTable();
    ~HandleTable();

-    /**
-     * Sets the number of handles that may be in use at one time
-     * for this handle table.
-     *
-     * @param handle_table_size The desired size to limit the handle table to.
-     *
-     * @returns an error code indicating if initialization was successful.
-     *          If initialization was not successful, then ERR_OUT_OF_MEMORY
-     *          will be returned.
-     *
-     * @pre handle_table_size must be within the range [0, 1024]
-     */
-    ResultCode SetSize(s32 handle_table_size);
-
    /**
     * Allocates a handle for the given object.
     * @return The created Handle or one of the following errors:
@@ -117,21 +103,14 @@ private:
     */
    std::array<u16, MAX_COUNT> generations;

-    /**
-     * The limited size of the handle table. This can be specified by process
-     * capabilities in order to restrict the overall number of handles that
-     * can be created in a process instance
-     */
-    u16 table_size = static_cast<u16>(MAX_COUNT);
-
    /**
     * Global counter of the number of created handles. Stored in `generations` when a handle is
     * created, and wraps around to 1 when it hits 0x8000.
     */
-    u16 next_generation = 1;
+    u16 next_generation;

    /// Head of the free slots linked list.
-    u16 next_free_slot = 0;
+    u16 next_free_slot;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -99,13 +99,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
    vm_manager.Reset(metadata.GetAddressSpaceType());

    const auto& caps = metadata.GetKernelCapabilities();
-    const auto capability_init_result =
-        capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
-    if (capability_init_result.IsError()) {
-        return capability_init_result;
-    }
-
-    return handle_table.SetSize(capabilities.GetHandleTableSize());
+    return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
 }

 void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
    interrupt_capabilities.set();

    // Allow using the maximum possible amount of handles
-    handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
+    handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);

    // Allow all debugging capabilities.
    is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
        return ERR_RESERVED_VALUE;
    }

-    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
+    handle_table_size = (flags >> 16) & 0x3FF;
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
    }

    /// Gets the number of total allowable handles for the process' handle table.
-    s32 GetHandleTableSize() const {
+    u32 GetHandleTableSize() const {
        return handle_table_size;
    }

@@ -252,7 +252,7 @@ private:
    u64 core_mask = 0;
    u64 priority_mask = 0;

-    s32 handle_table_size = 0;
+    u32 handle_table_size = 0;
    u32 kernel_version = 0;

    ProgramType program_type = ProgramType::SysModule;
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -262,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.submix_count * 1024;
-    buffer_sz += 0x940 * (params.submix_count + 1);
+    buffer_sz += params.unknown_c * 1024;
+    buffer_sz += 0x940 * (params.unknown_c + 1);
    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz += Common::AlignUp(
-        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
-            (params.mix_buffer_count + 6),
-        0x40);
+    buffer_sz +=
+        Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
+                            (params.mix_buffer_count + 6),
+                        0x40);

    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        const u32 count = params.submix_count + 1;
+        u32 count = params.unknown_c + 1;
        u64 node_count = Common::AlignUp(count, 0x40);
-        const u64 node_state_buffer_sz =
+        u64 node_state_buffer_sz =
            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
        u64 edge_matrix_buffer_sz = 0;
        node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {

    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.num_splitter_send_channels;
+        buffer_sz += 0xE0 * params.unknown_2c;
        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
+        buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
    }
    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
                    ((params.voice_count * 256) | 0x40);

-    if (params.performance_frame_count >= 1) {
+    if (params.unknown_1c >= 1) {
        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
                                      16 * params.voice_count + 16) +
                                     0x658) *
-                                            (params.performance_frame_count + 1) +
+                                            (params.unknown_1c + 1) +
                                        0xc0,
                                    0x40) +
                    output_sz;
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
-                        const Common::Rectangle<int>& crop_rect) {
+                        const MathUtil::Rectangle<int>& crop_rect) {
    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
              NVFlinger::BufferQueue::BufferTransformFlags transform,
-              const Common::Rectangle<int>& crop_rect);
+              const MathUtil::Rectangle<int>& crop_rect);

 private:
    std::shared_ptr<nvmap> nvmap_dev;
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
 }

 void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
-                              const Common::Rectangle<int>& crop_rect) {
+                              const MathUtil::Rectangle<int>& crop_rect) {
    auto itr = std::find_if(queue.begin(), queue.end(),
                            [&](const Buffer& buffer) { return buffer.slot == slot; });
    ASSERT(itr != queue.end());
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
        Status status = Status::Free;
        IGBPBuffer igbp_buffer;
        BufferTransformFlags transform;
-        Common::Rectangle<int> crop_rect;
+        MathUtil::Rectangle<int> crop_rect;
    };

    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
    std::optional<u32> DequeueBuffer(u32 width, u32 height);
    const IGBPBuffer& RequestBuffer(u32 slot) const;
    void QueueBuffer(u32 slot, BufferTransformFlags transform,
-                     const Common::Rectangle<int>& crop_rect);
+                     const MathUtil::Rectangle<int>& crop_rect);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
    u32 Query(QueryType type);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -28,13 +28,9 @@ namespace Service::NVFlinger {
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

-NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
-    displays.emplace_back(0, "Default");
-    displays.emplace_back(1, "External");
-    displays.emplace_back(2, "Edid");
-    displays.emplace_back(3, "Internal");
-    displays.emplace_back(4, "Null");
-
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing)
+    : displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}},
+      core_timing{core_timing} {
    // Schedule the screen composition events
    composition_event =
        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -59,14 +55,13 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    // TODO(Subv): Currently we only support the Default display.
    ASSERT(name == "Default");

-    const auto itr =
-        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetName() == name; });
+    const auto itr = std::find_if(displays.begin(), displays.end(),
+                                  [&](const VI::Display& display) { return display.name == name; });
    if (itr == displays.end()) {
        return {};
    }

-    return itr->GetID();
+    return itr->id;
 }

 std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -76,10 +71,13 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
        return {};
    }

+    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
+
    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
-    buffer_queues.emplace_back(buffer_queue_id, layer_id);
-    display->CreateLayer(layer_id, buffer_queues.back());
+    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
+    display->layers.emplace_back(layer_id, buffer_queue);
+    buffer_queues.emplace_back(std::move(buffer_queue));
    return layer_id;
 }

@@ -90,7 +88,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
        return {};
    }

-    return layer->GetBufferQueue().GetId();
+    return layer->buffer_queue->GetId();
 }

 Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -100,20 +98,12 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
        return nullptr;
    }

-    return display->GetVSyncEvent();
+    return display->vsync_event.readable;
 }

-BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
+std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
-
-    ASSERT(itr != buffer_queues.end());
-    return *itr;
-}
-
-const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
-    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [id](const auto& queue) { return queue.GetId() == id; });
+                                  [&](const auto& queue) { return queue->GetId() == id; });

    ASSERT(itr != buffer_queues.end());
    return *itr;
@@ -122,7 +112,7 @@ const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
 VI::Display* NVFlinger::FindDisplay(u64 display_id) {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetID() == display_id; });
+                     [&](const VI::Display& display) { return display.id == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -134,7 +124,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
 const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.GetID() == display_id; });
+                     [&](const VI::Display& display) { return display.id == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -150,7 +140,14 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
        return nullptr;
    }

-    return display->FindLayer(layer_id);
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
+                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
+
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

 const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
@@ -160,24 +157,33 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
        return nullptr;
    }

-    return display->FindLayer(layer_id);
+    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
+                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
+
+    if (itr == display->layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
 }

 void NVFlinger::Compose() {
    for (auto& display : displays) {
        // Trigger vsync for this display at the end of drawing
-        SCOPE_EXIT({ display.SignalVSyncEvent(); });
+        SCOPE_EXIT({ display.vsync_event.writable->Signal(); });

        // Don't do anything for displays without layers.
-        if (!display.HasLayers())
+        if (display.layers.empty())
            continue;

        // TODO(Subv): Support more than 1 layer.
-        VI::Layer& layer = display.GetLayer(0);
-        auto& buffer_queue = layer.GetBufferQueue();
+        ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
+
+        VI::Layer& layer = display.layers[0];
+        auto& buffer_queue = layer.buffer_queue;

        // Search for a queued buffer and acquire it
-        auto buffer = buffer_queue.AcquireBuffer();
+        auto buffer = buffer_queue->AcquireBuffer();

        MicroProfileFlip();

@@ -202,7 +208,7 @@ void NVFlinger::Compose() {
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue.ReleaseBuffer(buffer->get().slot);
+        buffer_queue->ReleaseBuffer(buffer->get().slot);
    }
 }

--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -28,8 +28,8 @@ class Module;
 } // namespace Service::Nvidia

 namespace Service::VI {
-class Display;
-class Layer;
+struct Display;
+struct Layer;
 } // namespace Service::VI

 namespace Service::NVFlinger {
@@ -65,10 +65,7 @@ public:
    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
-    BufferQueue& FindBufferQueue(u32 id);
-
-    /// Obtains a buffer queue identified by the ID.
-    const BufferQueue& FindBufferQueue(u32 id) const;
+    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
@@ -90,7 +87,7 @@ private:
    std::shared_ptr<Nvidia::Module> nvdrv;

    std::vector<VI::Display> displays;
-    std::vector<BufferQueue> buffer_queues;
+    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
    u64 next_layer_id = 1;
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -2,12 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <algorithm>
-#include <utility>
-
 #include <fmt/format.h>

-#include "common/assert.h"
 #include "core/core.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/service/vi/display/vi_display.h"
@@ -23,49 +19,4 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {

 Display::~Display() = default;

-Layer& Display::GetLayer(std::size_t index) {
-    return layers.at(index);
-}
-
-const Layer& Display::GetLayer(std::size_t index) const {
-    return layers.at(index);
-}
-
-Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
-    return vsync_event.readable;
-}
-
-void Display::SignalVSyncEvent() {
-    vsync_event.writable->Signal();
-}
-
-void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
-    // TODO(Subv): Support more than 1 layer.
-    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
-
-    layers.emplace_back(id, buffer_queue);
-}
-
-Layer* Display::FindLayer(u64 id) {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
-
-    if (itr == layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
-}
-
-const Layer* Display::FindLayer(u64 id) const {
-    const auto itr = std::find_if(layers.begin(), layers.end(),
-                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
-
-    if (itr == layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
-}
-
 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -10,84 +10,14 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"

-namespace Service::NVFlinger {
-class BufferQueue;
-}
-
 namespace Service::VI {

-class Layer;
+struct Layer;

-/// Represents a single display type
-class Display {
-public:
-    /// Constructs a display with a given unique ID and name.
-    ///
-    /// @param id   The unique ID for this display.
-    /// @param name The name for this display.
-    ///
+struct Display {
    Display(u64 id, std::string name);
    ~Display();

-    Display(const Display&) = delete;
-    Display& operator=(const Display&) = delete;
-
-    Display(Display&&) = default;
-    Display& operator=(Display&&) = default;
-
-    /// Gets the unique ID assigned to this display.
-    u64 GetID() const {
-        return id;
-    }
-
-    /// Gets the name of this display
-    const std::string& GetName() const {
-        return name;
-    }
-
-    /// Whether or not this display has any layers added to it.
-    bool HasLayers() const {
-        return !layers.empty();
-    }
-
-    /// Gets a layer for this display based off an index.
-    Layer& GetLayer(std::size_t index);
-
-    /// Gets a layer for this display based off an index.
-    const Layer& GetLayer(std::size_t index) const;
-
-    /// Gets the readable vsync event.
-    Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
-
-    /// Signals the internal vsync event.
-    void SignalVSyncEvent();
-
-    /// Creates and adds a layer to this display with the given ID.
-    ///
-    /// @param id           The ID to assign to the created layer.
-    /// @param buffer_queue The buffer queue for the layer instance to use.
-    ///
-    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
-
-    /// Attempts to find a layer with the given ID.
-    ///
-    /// @param id The layer ID.
-    ///
-    /// @returns If found, the Layer instance with the given ID.
-    ///          If not found, then nullptr is returned.
-    ///
-    Layer* FindLayer(u64 id);
-
-    /// Attempts to find a layer with the given ID.
-    ///
-    /// @param id The layer ID.
-    ///
-    /// @returns If found, the Layer instance with the given ID.
-    ///          If not found, then nullptr is returned.
-    ///
-    const Layer* FindLayer(u64 id) const;
-
-private:
    u64 id;
    std::string name;

--- a/src/core/hle/service/vi/layer/vi_layer.cpp
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -6,7 +6,8 @@

 namespace Service::VI {

-Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
+Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue)
+    : id{id}, buffer_queue{std::move(queue)} {}

 Layer::~Layer() = default;

--- a/src/core/hle/service/vi/layer/vi_layer.h
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -4,6 +4,8 @@

 #pragma once

+#include <memory>
+
 #include "common/common_types.h"

 namespace Service::NVFlinger {
@@ -12,41 +14,12 @@ class BufferQueue;

 namespace Service::VI {

-/// Represents a single display layer.
-class Layer {
-public:
-    /// Constructs a layer with a given ID and buffer queue.
-    ///
-    /// @param id    The ID to assign to this layer.
-    /// @param queue The buffer queue for this layer to use.
-    ///
-    Layer(u64 id, NVFlinger::BufferQueue& queue);
+struct Layer {
+    Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue);
    ~Layer();

-    Layer(const Layer&) = delete;
-    Layer& operator=(const Layer&) = delete;
-
-    Layer(Layer&&) = default;
-    Layer& operator=(Layer&&) = delete;
-
-    /// Gets the ID for this layer.
-    u64 GetID() const {
-        return id;
-    }
-
-    /// Gets a reference to the buffer queue this layer is using.
-    NVFlinger::BufferQueue& GetBufferQueue() {
-        return buffer_queue;
-    }
-
-    /// Gets a const reference to the buffer queue this layer is using.
-    const NVFlinger::BufferQueue& GetBufferQueue() const {
-        return buffer_queue;
-    }
-
-private:
    u64 id;
-    NVFlinger::BufferQueue& buffer_queue;
+    std::shared_ptr<NVFlinger::BufferQueue> buffer_queue;
 };

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -420,7 +420,7 @@ public:
        u32_le fence_is_valid;
        std::array<Fence, 2> fences;

-        Common::Rectangle<int> GetCropRect() const {
+        MathUtil::Rectangle<int> GetCropRect() const {
            return {crop_left, crop_top, crop_right, crop_bottom};
        }
    };
@@ -525,7 +525,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  static_cast<u32>(transaction), flags);

-        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+        auto buffer_queue = nv_flinger->FindBufferQueue(id);

        if (transaction == TransactionId::Connect) {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
        } else if (transaction == TransactionId::SetPreallocatedBuffer) {
            IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
+            buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);

            IGBPSetPreallocatedBufferResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);

            if (slot) {
                // Buffer is available
@@ -559,8 +559,8 @@ private:
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
-                        std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
+                        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
                        ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");

                        IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
                        IPC::ResponseBuilder rb{ctx, 2};
                        rb.Push(RESULT_SUCCESS);
                    },
-                    buffer_queue.GetWritableBufferWaitEvent());
+                    buffer_queue->GetWritableBufferWaitEvent());
            }
        } else if (transaction == TransactionId::RequestBuffer) {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

-            auto& buffer = buffer_queue.RequestBuffer(request.slot);
+            auto& buffer = buffer_queue->RequestBuffer(request.slot);

            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::QueueBuffer) {
            IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
-                                     request.data.GetCropRect());
+            buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
+                                      request.data.GetCropRect());

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::Query) {
            IGBPQueryRequestParcel request{ctx.ReadBuffer()};

-            const u32 value =
-                buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
+            u32 value =
+                buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));

            IGBPQueryResponseParcel response{value};
            ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+        const auto buffer_queue = nv_flinger->FindBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
+        rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
    }

    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,7 +752,6 @@ public:
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
            {2011, nullptr, "DestroyManagedLayer"},
-            {2012, nullptr, "CreateStrayLayer"},
            {2050, nullptr, "CreateIndirectLayer"},
            {2051, nullptr, "DestroyIndirectLayer"},
            {2052, nullptr, "CreateIndirectProducerEndPoint"},
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
    }

    void BeginTilt(int x, int y) {
-        mouse_origin = Common::MakeVec(x, y);
+        mouse_origin = Math::MakeVec(x, y);
        is_tilting = true;
    }

    void Tilt(int x, int y) {
-        auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
+        auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
        if (is_tilting) {
            std::lock_guard<std::mutex> guard(tilt_mutex);
            if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
            } else {
                tilt_direction = mouse_move.Cast<float>();
                tilt_angle =
-                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
+                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
            }
        }
    }
@@ -56,7 +56,7 @@ public:
        is_tilting = false;
    }

-    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
+    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
        std::lock_guard<std::mutex> guard(status_mutex);
        return status;
    }
@@ -66,17 +66,17 @@ private:
    const std::chrono::steady_clock::duration update_duration;
    const float sensitivity;

-    Common::Vec2<int> mouse_origin;
+    Math::Vec2<int> mouse_origin;

    std::mutex tilt_mutex;
-    Common::Vec2<float> tilt_direction;
+    Math::Vec2<float> tilt_direction;
    float tilt_angle = 0;

    bool is_tilting = false;

    Common::Event shutdown_event;

-    std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
+    std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
    std::mutex status_mutex;

    // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:

    void MotionEmuThread() {
        auto update_time = std::chrono::steady_clock::now();
-        Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
-        Common::Quaternion<float> old_q;
+        Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
+        Math::Quaternion<float> old_q;

        while (!shutdown_event.WaitUntil(update_time)) {
            update_time += update_duration;
@@ -96,18 +96,18 @@ private:
                std::lock_guard<std::mutex> guard(tilt_mutex);

                // Find the quaternion describing current 3DS tilting
-                q = Common::MakeQuaternion(
-                    Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
+                q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
+                                   tilt_angle);
            }

            auto inv_q = q.Inverse();

            // Set the gravity vector in world space
-            auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
+            auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);

            // Find the angular rate vector in world space
            auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
-            angular_rate *= 1000 / update_millisecond / Common::PI * 180;
+            angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;

            // Transform the two vectors from world space to 3DS space
            gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
        device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
    }

-    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
+    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
        return device->GetStatus();
    }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -111,7 +111,9 @@ if (ENABLE_VULKAN)
        renderer_vulkan/vk_resource_manager.cpp
        renderer_vulkan/vk_resource_manager.h
        renderer_vulkan/vk_scheduler.cpp
-        renderer_vulkan/vk_scheduler.h)
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h)

    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,36 +33,18 @@ void DmaPusher::DispatchCalls() {
 }

 bool DmaPusher::Step() {
-    if (!ib_enable || dma_pushbuffer.empty()) {
-        // pushbuffer empty and IB empty or nonexistent - nothing to do
-        return false;
-    }
+    if (dma_get != dma_put) {
+        // Push buffer non-empty, read a word
+        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+        ASSERT_MSG(address, "Invalid GPU address");

-    const CommandList& command_list{dma_pushbuffer.front()};
-    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-    GPUVAddr dma_get = command_list_header.addr;
-    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
-    bool non_main = command_list_header.is_non_main;
+        const CommandHeader command_header{Memory::Read32(*address)};

-    if (dma_pushbuffer_subindex >= command_list.size()) {
-        // We've gone through the current list, remove it from the queue
-        dma_pushbuffer.pop();
-        dma_pushbuffer_subindex = 0;
-    }
+        dma_get += sizeof(u32);

-    if (command_list_header.size == 0) {
-        return true;
-    }
-
-    // Push buffer non-empty, read a word
-    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-    ASSERT_MSG(address, "Invalid GPU address");
-
-    command_headers.resize(command_list_header.size);
-
-    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
-
-    for (const CommandHeader& command_header : command_headers) {
+        if (!non_main) {
+            dma_mget = dma_get;
+        }

        // now, see if we're in the middle of a command
        if (dma_state.length_pending) {
@@ -109,11 +91,22 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    }
+    } else if (ib_enable && !dma_pushbuffer.empty()) {
+        // Current pushbuffer empty, but we have more IB entries to read
+        const CommandList& command_list{dma_pushbuffer.front()};
+        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+        dma_get = command_list_header.addr;
+        dma_put = dma_get + command_list_header.size * sizeof(u32);
+        non_main = command_list_header.is_non_main;

-    if (!non_main) {
-        // TODO (degasus): This is dead code, as dma_mget is never read.
-        dma_mget = dma_put;
+        if (dma_pushbuffer_subindex >= command_list.size()) {
+            // We've gone through the current list, remove it from the queue
+            dma_pushbuffer.pop();
+            dma_pushbuffer_subindex = 0;
+        }
+    } else {
+        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
+        return {};
    }

    return true;
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,8 +75,6 @@ private:

    GPU& gpu;

-    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
-
    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer

@@ -91,8 +89,11 @@ private:
    DmaState dma_state{};
    bool dma_increment_once{};

+    GPUVAddr dma_put{};   ///< pushbuffer current end address
+    GPUVAddr dma_get{};   ///< pushbuffer current read address
    GPUVAddr dma_mget{};  ///< main pushbuffer last read address
    bool ib_enable{true}; ///< IB mode enabled
+    bool non_main{};      ///< non-main pushbuffer active
 };

 } // namespace Tegra
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
    const u32 src_blit_y2{
        static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};

-    const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
-    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
-                                          regs.blit_dst_x + regs.blit_dst_width,
-                                          regs.blit_dst_y + regs.blit_dst_height};
+    const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+    const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+                                            regs.blit_dst_x + regs.blit_dst_width,
+                                            regs.blit_dst_y + regs.blit_dst_height};

    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
        UNIMPLEMENTED();
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -503,7 +503,7 @@ public:
            f32 translate_z;
            INSERT_PADDING_WORDS(2);

-            Common::Rectangle<s32> GetRect() const {
+            MathUtil::Rectangle<s32> GetRect() const {
                return {
                    GetX(),               // left
                    GetY() + GetHeight(), // top
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };

 enum class IpaInterpMode : u64 {
-    Pass = 0,
-    Multiply = 1,
-    Constant = 2,
+    Linear = 0,
+    Perspective = 1,
+    Flat = 2,
    Sc = 3,
 };

--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,13 +16,6 @@ enum class OutputTopology : u32 {
    TriangleStrip = 7,
 };

-enum class AttributeUse : u8 {
-    Unused = 0,
-    Constant = 1,
-    Perspective = 2,
-    ScreenLinear = 3,
-};
-
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -91,15 +84,9 @@ struct Header {
        } vtg;

        struct {
-            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
-            union {
-                BitField<0, 2, AttributeUse> x;
-                BitField<2, 2, AttributeUse> y;
-                BitField<4, 2, AttributeUse> w;
-                BitField<6, 2, AttributeUse> z;
-                u8 raw;
-            } imap_generic_vector[32];
+            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
+            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
            INSERT_PADDING_BYTES(2);  // ImapColor
            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -116,28 +103,6 @@ struct Header {
                const u32 bit = render_target * 4 + component;
                return omap.target & (1 << bit);
            }
-            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
-                return static_cast<AttributeUse>(
-                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
-            }
-            AttributeUse GetAttributeUse(u32 attribute) const {
-                AttributeUse result = AttributeUse::Unused;
-                for (u32 i = 0; i < 4; i++) {
-                    const auto index = GetAttributeIndexUse(attribute, i);
-                    if (index == AttributeUse::Unused) {
-                        continue;
-                    }
-                    if (result == AttributeUse::Unused || result == index) {
-                        result = index;
-                        continue;
-                    }
-                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
-                    if (index == AttributeUse::Perspective) {
-                        result = index;
-                    }
-                }
-                return result;
-            }
        } ps;
    };

--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -100,7 +100,7 @@ struct FramebufferConfig {

    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
    TransformFlags transform_flags;
-    Common::Rectangle<int> crop_rect;
+    MathUtil::Rectangle<int> crop_rect;
 };

 namespace Engines {
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                       const Common::Rectangle<u32>& src_rect,
-                                       const Common::Rectangle<u32>& dst_rect) {
+                                       const MathUtil::Rectangle<u32>& src_rect,
+                                       const MathUtil::Rectangle<u32>& dst_rect) {
        return false;
    }

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -779,8 +779,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {

 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                             const Common::Rectangle<u32>& src_rect,
-                                             const Common::Rectangle<u32>& dst_rect) {
+                                             const MathUtil::Rectangle<u32>& src_rect,
+                                             const MathUtil::Rectangle<u32>& dst_rect) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
    res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
    return true;
@@ -1034,7 +1034,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
    for (std::size_t i = 0; i < viewport_count; i++) {
        auto& viewport = current_state.viewports[i];
        const auto& src = regs.viewports[i];
-        const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
        viewport.x = viewport_rect.left;
        viewport.y = viewport_rect.bottom;
        viewport.width = viewport_rect.GetWidth();
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                               const Common::Rectangle<u32>& src_rect,
-                               const Common::Rectangle<u32>& dst_rect) override;
+                               const MathUtil::Rectangle<u32>& src_rect,
+                               const MathUtil::Rectangle<u32>& dst_rect) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -399,7 +399,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
    u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
    if (IsPixelFormatASTC(pixel_format)) {
        // ASTC formats must stop at the ATSC block size boundary
@@ -1062,8 +1062,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
 }

 static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
-                        const Common::Rectangle<u32>& src_rect,
-                        const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+                        const MathUtil::Rectangle<u32>& src_rect,
+                        const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
                        GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
                        std::size_t cubemap_face = 0) {

@@ -1193,7 +1193,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
 void RasterizerCacheOpenGL::FermiCopySurface(
    const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-    const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
+    const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {

    const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
    const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1257,11 +1257,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        if (old_params.pixel_format == new_params.pixel_format)
-            FastLayeredCopySurface(old_surface, new_surface);
-        else {
-            AccurateCopySurface(old_surface, new_surface);
-        }
+        FastLayeredCopySurface(old_surface, new_surface);
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,7 +28,7 @@ namespace OpenGL {

 class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;

 using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
 using SurfaceType = VideoCore::Surface::SurfaceType;
@@ -71,7 +71,7 @@ struct SurfaceParams {
    }

    /// Returns the rectangle corresponding to this surface
-    Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
+    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -430,8 +430,8 @@ public:
    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-                          const Common::Rectangle<u32>& src_rect,
-                          const Common::Rectangle<u32>& dst_rect);
+                          const MathUtil::Rectangle<u32>& src_rect,
+                          const MathUtil::Rectangle<u32>& dst_rect);

 private:
    void LoadSurface(const Surface& surface);
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,7 +20,6 @@
 namespace OpenGL::GLShader {

 using Tegra::Shader::Attribute;
-using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -289,22 +288,34 @@ private:
        code.AddNewLine();
    }

-    std::string GetInputFlags(AttributeUse attribute) {
+    std::string GetInputFlags(const IpaMode& input_mode) {
+        const IpaSampleMode sample_mode = input_mode.sampling_mode;
+        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
        std::string out;

-        switch (attribute) {
-        case AttributeUse::Constant:
+        switch (interp_mode) {
+        case IpaInterpMode::Flat:
            out += "flat ";
            break;
-        case AttributeUse::ScreenLinear:
+        case IpaInterpMode::Linear:
            out += "noperspective ";
            break;
-        case AttributeUse::Perspective:
+        case IpaInterpMode::Perspective:
            // Default, Smooth
            break;
        default:
-            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
-            UNREACHABLE();
+            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
+        }
+        switch (sample_mode) {
+        case IpaSampleMode::Centroid:
+            // It can be implemented with the "centroid " keyword in GLSL
+            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
+            break;
+        case IpaSampleMode::Default:
+            // Default, n/a
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
        }
        return out;
    }
@@ -313,11 +324,16 @@ private:
        const auto& attributes = ir.GetInputAttributes();
        for (const auto element : attributes) {
            const Attribute::Index index = element.first;
+            const IpaMode& input_mode = *element.second.begin();
            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                // Skip when it's not a generic attribute
                continue;
            }

+            ASSERT(element.second.size() > 0);
+            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
+                                 "Multiple input flag modes are not supported in GLSL");
+
            // TODO(bunnei): Use proper number of elements for these
            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
            if (stage != ShaderStage::Vertex) {
@@ -329,14 +345,8 @@ private:
            if (stage == ShaderStage::Geometry) {
                attr = "gs_" + attr + "[]";
            }
-            std::string suffix;
-            if (stage == ShaderStage::Fragment) {
-                const auto input_mode =
-                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
-                suffix = GetInputFlags(input_mode);
-            }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
-                         attr + ';');
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
+                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
        }
        if (!attributes.empty())
            code.AddNewLine();
@@ -1574,4 +1584,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#pragma once
+
 #include <cstring>
 #include <fmt/format.h>
 #include <lz4.h>
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (location = 0) in noperspective vec4 position;
+layout (location = 0) in vec4 position;

 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
    vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
    return {out, program.second};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -257,7 +257,6 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                 const Tegra::FramebufferConfig& framebuffer) {
    texture.width = framebuffer.width;
    texture.height = framebuffer.height;
-    texture.pixel_format = framebuffer.pixel_format;

    GLint internal_format;
    switch (framebuffer.pixel_format) {
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
 /// Structure used for storing information about the display target for the Switch screen
 struct ScreenInfo {
    GLuint display_texture;
-    const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
+    const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
    TextureInfo texture;
 };

@@ -102,7 +102,7 @@ private:

    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
-    Common::Rectangle<int> framebuffer_crop_rect;
+    MathUtil::Rectangle<int> framebuffer_crop_rect;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
    protected_resources.push_back(resource);
 }

-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
-    if (it != protected_resources.end()) {
-        protected_resources.erase(it);
-    }
+    ASSERT(it != protected_resources.end());
+
+    resource->OnFenceRemoval(this);
+    protected_resources.erase(it);
 }

 VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
 }

 void VKFenceWatch::Wait() {
-    if (!fence) {
+    if (fence == nullptr) {
        return;
    }
    fence->Wait();
    fence->Unprotect(this);
-    fence = nullptr;
 }

 void VKFenceWatch::Watch(VKFence& new_fence) {
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
    void Protect(VKResource* resource);

    /// Removes protection for a resource.
-    void Unprotect(const VKResource* resource);
+    void Unprotect(VKResource* resource);

    /// Retreives the fence.
    operator vk::Fence() const {
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                                                                   pipeline_stage} {
+    CreateBuffers(memory_manager, usage);
+    ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+    ASSERT(size <= buffer_size);
+    mapped_size = size;
+
+    if (offset + size > buffer_size) {
+        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        // reset the state.
+        invalidation_mark = used_watches;
+        used_watches = 0;
+        offset = 0;
+    }
+
+    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    if (invalidation_mark) {
+        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        exctx = scheduler.Flush();
+        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+                      [&](auto& resource) { resource->Wait(); });
+        invalidation_mark = std::nullopt;
+    }
+
+    if (used_watches + 1 >= watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(WATCHES_RESERVE_CHUNK);
+    }
+    // Add a watch for this allocation.
+    watches[used_watches++]->Watch(exctx.GetFence());
+
+    offset += size;
+
+    return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    commit = memory_manager.Commit(*buffer, true);
+    mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+    const std::size_t previous_size = watches.size();
+    watches.resize(previous_size + grow_size);
+    std::generate(watches.begin() + previous_size, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    ~VKStreamBuffer();
+
+    /**
+     * Reserves a region of memory from the stream buffer.
+     * @param size Size to reserve.
+     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+     * offset and a boolean that's true when buffer has been invalidated.
+     */
+    std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+    vk::Buffer GetBuffer() const {
+        return *buffer;
+    }
+
+private:
+    /// Creates Vulkan buffer handles committing the required the required memory.
+    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+    /// Increases the amount of watches available.
+    void ReserveWatches(std::size_t grow_size);
+
+    const VKDevice& device;                      ///< Vulkan device manager.
+    VKScheduler& scheduler;                      ///< Command scheduler.
+    const u64 buffer_size;                       ///< Total size of the stream buffer.
+    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
+    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+    UniqueBuffer buffer;   ///< Mapped buffer.
+    VKMemoryCommit commit; ///< Memory commit.
+    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+
+    u64 offset{};      ///< Buffer iterator.
+    u64 mapped_size{}; ///< Size reserved for the current copy.
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t>
+        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
                                          Tegra::Shader::IpaSampleMode::Default};

        u64 next_element = instr.attribute.fmt20.element;
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,18 +135,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                                                instr.ipa.sample_mode.Value()};

        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        Node value = attr;
-        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
-        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
-            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
-            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
-            // In theory by setting them as perspective, OpenGL does the perspective correction.
-            // A way must figured to reverse the last step of it.
-            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
-                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
-            }
-        }
-        value = GetSaturatedFloat(value, instr.ipa.saturate);
+        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);

        SetRegister(bb, instr.gpr0, value);
        break;
@@ -186,4 +175,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                return {node, cursor};
        }
        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
-            const auto& conditional_code = conditional->GetCode();
-            const auto [found, internal_cursor] = FindOperation(
-                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
+            const auto& code = conditional->GetCode();
+            const auto [found, internal_cursor] =
+                FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
            if (found)
                return {found, cursor};
        }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
        return nullptr;
    }
    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
-        const auto& conditional_code = conditional->GetCode();
-        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
+        const auto& code = conditional->GetCode();
+        return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
    }
    return nullptr;
 }
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {

    for (unsigned int y = 0; y < surface_height; ++y) {
        for (unsigned int x = 0; x < surface_width; ++x) {
-            Common::Vec4<u8> color;
+            Math::Vec4<u8> color;
            color[0] = texture_data[x + y * surface_width + 0];
            color[1] = texture_data[x + y * surface_width + 1];
            color[2] = texture_data[x + y * surface_width + 2];