gl_rasterizer: Remove texture unbinding after dispatching a draw call

Unbinding was required when OpenGL delete operations didn't unbind a resource if it was bound. This is no longer needed and can be removed.
gl_state: Fixup multibind bug
2019-02-28 00:17:50 -03:00 · 2019-02-28 00:17:03 -03:00 · 2019-02-27 21:20:06 -05:00 · 2019-02-27 21:19:15 -05:00 · 2019-02-27 21:17:55 -05:00 · 2019-02-27 21:14:20 -05:00
78 changed files with 1561 additions and 478 deletions
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
--- a/externals/cubeb
+++ b/externals/cubeb
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
    u32_le sample_rate;
    u32_le sample_count;
    u32_le mix_buffer_count;
-    u32_le unknown_c;
+    u32_le submix_count;
    u32_le voice_count;
    u32_le sink_count;
    u32_le effect_count;
-    u32_le unknown_1c;
-    u8 unknown_20;
-    INSERT_PADDING_BYTES(3);
+    u32_le performance_frame_count;
+    u8 is_voice_drop_enabled;
+    u8 unknown_21;
+    u8 unknown_22;
+    u8 execution_mode;
    u32_le splitter_count;
-    u32_le unknown_2c;
-    INSERT_PADDING_WORDS(1);
+    u32_le num_splitter_send_channels;
+    u32_le unknown_30;
    u32_le revision;
 };
 static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
        }
    }

-    state.yn1 = yn1;
-    state.yn2 = yn2;
+    state.yn1 = static_cast<s16>(yn1);
+    state.yn2 = static_cast<s16>(yn2);

    return ret;
 }
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
 #include "common/ring_buffer.h"
 #include "core/settings.h"

+#ifdef _MSC_VER
+#include <objbase.h>
+#endif
+
 namespace AudioCore {

 class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
        }
    }

-    ~CubebSinkStream() {
+    ~CubebSinkStream() override {
        if (!ctx) {
            return;
        }
@@ -75,11 +79,11 @@ public:
        queue.Push(samples);
    }

-    std::size_t SamplesInQueue(u32 num_channels) const override {
+    std::size_t SamplesInQueue(u32 channel_count) const override {
        if (!ctx)
            return 0;

-        return queue.Size() / num_channels;
+        return queue.Size() / channel_count;
    }

    void Flush() override {
@@ -98,7 +102,7 @@ private:
    u32 num_channels{};

    Common::RingBuffer<s16, 0x10000> queue;
-    std::array<s16, 2> last_frame;
+    std::array<s16, 2> last_frame{};
    std::atomic<bool> should_flush{};
    TimeStretcher time_stretch;

@@ -108,6 +112,11 @@ private:
 };

 CubebSink::CubebSink(std::string_view target_device_name) {
+    // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
+#ifdef _MSC_VER
+    com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+#endif
+
    if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
        LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
        return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
    }

    cubeb_destroy(ctx);
+
+#ifdef _MSC_VER
+    if (SUCCEEDED(com_init_result)) {
+        CoUninitialize();
+    }
+#endif
 }

 SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
    cubeb* ctx{};
    cubeb_devid output_device{};
    std::vector<SinkStreamPtr> sink_streams;
+
+#ifdef _MSC_VER
+    u32 com_init_result = 0;
+#endif
 };

 std::vector<std::string> ListCubebSinkDevices();
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
 /**
 * Decode a color stored in RGBA8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
    return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }

 /**
 * Decode a color stored in RGB8 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
    return {bytes[2], bytes[1], bytes[0], 255};
 }

 /**
 * Decode a color stored in RG8 (aka HILO8) format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
    return {bytes[1], bytes[0], 0, 255};
 }

 /**
 * Decode a color stored in RGB565 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
 /**
 * Decode a color stored in RGB5A1 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
 /**
 * Decode a color stored in RGBA4 format
 * @param bytes Pointer to encoded source color
- * @return Result color decoded as Math::Vec4<u8>
+ * @return Result color decoded as Common::Vec4<u8>
 */
-inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
    u16_le pixel;
    std::memcpy(&pixel, bytes, sizeof(pixel));
    return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
 /**
 * Decode a depth value and a stencil value stored in D24S8 format
 * @param bytes Pointer to encoded source values
- * @return Resulting values stored as a Math::Vec2
+ * @return Resulting values stored as a Common::Vec2
 */
-inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
    return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }

@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[3] = color.r();
    bytes[2] = color.g();
    bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[2] = color.r();
    bytes[1] = color.g();
    bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
    bytes[1] = color.r();
    bytes[0] = color.g();
 }
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data =
        (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());

@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());

@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
 * @param color Source color to encode
 * @param bytes Destination pointer to store encoded color
 */
-inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
+inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());

--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
 #include <cstdlib>
 #include <type_traits>

-namespace MathUtil {
+namespace Common {

 constexpr float PI = 3.14159265f;

@@ -41,4 +41,4 @@ struct Rectangle {
    }
 };

-} // namespace MathUtil
+} // namespace Common
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@

 #include "common/vector_math.h"

-namespace Math {
+namespace Common {

 template <typename T>
 class Quaternion {
 public:
-    Math::Vec3<T> xyz;
+    Vec3<T> xyz;
    T w{};

    Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
 };

 template <typename T>
-auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) {
+auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
    return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
 }

-inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) {
+inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
    return {axis * std::sin(angle / 2), std::cos(angle / 2)};
 }

-} // namespace Math
+} // namespace Common
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
 #include <cstring>
 #include "common/common_types.h"

-// GCC 4.6+
-#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+// GCC
+#ifdef __GNUC__

 #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
 #endif

 // LLVM/clang
-#elif __clang__
+#elif defined(__clang__)

 #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
 #define COMMON_LITTLE_ENDIAN 1
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
 #include <cmath>
 #include <type_traits>

-namespace Math {
+namespace Common {

 template <typename T>
 class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
    return MakeVec(x, yzw[0], yzw[1], yzw[2]);
 }

-} // namespace Math
+} // namespace Common
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -128,7 +128,7 @@ struct System::Impl {
            return ResultStatus::ErrorVideoCore;
        }

-        gpu_core = std::make_unique<Tegra::GPU>(renderer->Rasterizer());
+        gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer());

        cpu_core_manager.Initialize(system);
        is_powered_on = true;
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -398,7 +398,8 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
 }

 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
-    std::ifstream file(filename);
+    std::ifstream file;
+    OpenFStream(file, filename, std::ios_base::in);
    if (!file.is_open())
        return;

--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
    if (offset + length > data.size())
        data.resize(offset + length);
    const auto write = std::min(length, data.size() - offset);
-    std::memcpy(data.data(), data_, write);
+    std::memcpy(data.data() + offset, data_, write);
    return write;
 }

--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
            framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
 }

-std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) {
+std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
    new_x = std::max(new_x, framebuffer_layout.screen.left);
    new_x = std::min(new_x, framebuffer_layout.screen.right - 1);

--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
    /**
     * Clip the provided coordinates to be inside the touchscreen area.
     */
-    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y);
+    std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
 };

 } // namespace Core::Frontend
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {

 // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
 template <class T>
-static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area,
-                                           float screen_aspect_ratio) {
+static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
+                                         float screen_aspect_ratio) {
    float scale = std::min(static_cast<float>(window_area.GetWidth()),
                           window_area.GetHeight() / screen_aspect_ratio);
-    return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
-                                  static_cast<T>(std::round(scale * screen_aspect_ratio))};
+    return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
+                                static_cast<T>(std::round(scale * screen_aspect_ratio))};
 }

 FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {

    const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
                                       ScreenUndocked::Width};
-    MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height};
-    MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio);
+    Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
+    Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);

    float window_aspect_ratio = static_cast<float>(height) / width;

--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
    unsigned width{ScreenUndocked::Width};
    unsigned height{ScreenUndocked::Height};

-    MathUtil::Rectangle<unsigned> screen;
+    Common::Rectangle<unsigned> screen;

    /**
     * Returns the ration of pixel size of the screen, compared to the native size of the undocked
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
 *   Orientation is determined by right-hand rule.
 *   Units: deg/sec
 */
-using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
+using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;

 /**
 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
 constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
 constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
 constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
+constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
 constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
 namespace Kernel {
 namespace {
 constexpr u16 GetSlot(Handle handle) {
-    return handle >> 15;
+    return static_cast<u16>(handle >> 15);
 }

 constexpr u16 GetGeneration(Handle handle) {
-    return handle & 0x7FFF;
+    return static_cast<u16>(handle & 0x7FFF);
 }
 } // Anonymous namespace

 HandleTable::HandleTable() {
-    next_generation = 1;
    Clear();
 }

 HandleTable::~HandleTable() = default;

+ResultCode HandleTable::SetSize(s32 handle_table_size) {
+    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
+        return ERR_OUT_OF_MEMORY;
+    }
+
+    // Values less than or equal to zero indicate to use the maximum allowable
+    // size for the handle table in the actual kernel, so we ignore the given
+    // value in that case, since we assume this by default unless this function
+    // is called.
+    if (handle_table_size > 0) {
+        table_size = static_cast<u16>(handle_table_size);
+    }
+
+    return RESULT_SUCCESS;
+}
+
 ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
    DEBUG_ASSERT(obj != nullptr);

-    u16 slot = next_free_slot;
-    if (slot >= generations.size()) {
+    const u16 slot = next_free_slot;
+    if (slot >= table_size) {
        LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
        return ERR_HANDLE_TABLE_FULL;
    }
    next_free_slot = generations[slot];

-    u16 generation = next_generation++;
+    const u16 generation = next_generation++;

    // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
    // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 }

 ResultCode HandleTable::Close(Handle handle) {
-    if (!IsValid(handle))
+    if (!IsValid(handle)) {
        return ERR_INVALID_HANDLE;
+    }

-    u16 slot = GetSlot(handle);
+    const u16 slot = GetSlot(handle);

    objects[slot] = nullptr;

@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
 }

 bool HandleTable::IsValid(Handle handle) const {
-    std::size_t slot = GetSlot(handle);
-    u16 generation = GetGeneration(handle);
+    const std::size_t slot = GetSlot(handle);
+    const u16 generation = GetGeneration(handle);

-    return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation;
+    return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
 }

 SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
 }

 void HandleTable::Clear() {
-    for (u16 i = 0; i < MAX_COUNT; ++i) {
+    for (u16 i = 0; i < table_size; ++i) {
        generations[i] = i + 1;
        objects[i] = nullptr;
    }
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -49,6 +49,20 @@ public:
    HandleTable();
    ~HandleTable();

+    /**
+     * Sets the number of handles that may be in use at one time
+     * for this handle table.
+     *
+     * @param handle_table_size The desired size to limit the handle table to.
+     *
+     * @returns an error code indicating if initialization was successful.
+     *          If initialization was not successful, then ERR_OUT_OF_MEMORY
+     *          will be returned.
+     *
+     * @pre handle_table_size must be within the range [0, 1024]
+     */
+    ResultCode SetSize(s32 handle_table_size);
+
    /**
     * Allocates a handle for the given object.
     * @return The created Handle or one of the following errors:
@@ -103,14 +117,21 @@ private:
     */
    std::array<u16, MAX_COUNT> generations;

+    /**
+     * The limited size of the handle table. This can be specified by process
+     * capabilities in order to restrict the overall number of handles that
+     * can be created in a process instance
+     */
+    u16 table_size = static_cast<u16>(MAX_COUNT);
+
    /**
     * Global counter of the number of created handles. Stored in `generations` when a handle is
     * created, and wraps around to 1 when it hits 0x8000.
     */
-    u16 next_generation;
+    u16 next_generation = 1;

    /// Head of the free slots linked list.
-    u16 next_free_slot;
+    u16 next_free_slot = 0;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
    vm_manager.Reset(metadata.GetAddressSpaceType());

    const auto& caps = metadata.GetKernelCapabilities();
-    return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    const auto capability_init_result =
+        capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
+    if (capability_init_result.IsError()) {
+        return capability_init_result;
+    }
+
+    return handle_table.SetSize(capabilities.GetHandleTableSize());
 }

 void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
    interrupt_capabilities.set();

    // Allow using the maximum possible amount of handles
-    handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT);
+    handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);

    // Allow all debugging capabilities.
    is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
        return ERR_RESERVED_VALUE;
    }

-    handle_table_size = (flags >> 16) & 0x3FF;
+    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
    }

    /// Gets the number of total allowable handles for the process' handle table.
-    u32 GetHandleTableSize() const {
+    s32 GetHandleTableSize() const {
        return handle_table_size;
    }

@@ -252,7 +252,7 @@ private:
    u64 core_mask = 0;
    u64 priority_mask = 0;

-    u32 handle_table_size = 0;
+    s32 handle_table_size = 0;
    u32 kernel_version = 0;

    ProgramType program_type = ProgramType::SysModule;
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -262,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
    LOG_DEBUG(Service_Audio, "called");

    u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
-    buffer_sz += params.unknown_c * 1024;
-    buffer_sz += 0x940 * (params.unknown_c + 1);
+    buffer_sz += params.submix_count * 1024;
+    buffer_sz += 0x940 * (params.submix_count + 1);
    buffer_sz += 0x3F0 * params.voice_count;
-    buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10);
+    buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
    buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
-    buffer_sz +=
-        Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
-                            (params.mix_buffer_count + 6),
-                        0x40);
+    buffer_sz += Common::AlignUp(
+        (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
+            (params.mix_buffer_count + 6),
+        0x40);

    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        u32 count = params.unknown_c + 1;
+        const u32 count = params.submix_count + 1;
        u64 node_count = Common::AlignUp(count, 0x40);
-        u64 node_state_buffer_sz =
+        const u64 node_state_buffer_sz =
            4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
        u64 edge_matrix_buffer_sz = 0;
        node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {

    buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
    if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
-        buffer_sz += 0xE0 * params.unknown_2c;
+        buffer_sz += 0xE0 * params.num_splitter_send_channels;
        buffer_sz += 0x20 * params.splitter_count;
-        buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10);
+        buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
    }
    buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
    u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
                    ((params.voice_count * 256) | 0x40);

-    if (params.unknown_1c >= 1) {
+    if (params.performance_frame_count >= 1) {
        output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
                                      16 * params.voice_count + 16) +
                                     0x658) *
-                                            (params.unknown_1c + 1) +
+                                            (params.performance_frame_count + 1) +
                                        0xc0,
                                    0x40) +
                    output_sz;
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -15,7 +15,7 @@ namespace Kernel {
 class SharedMemory;
 }

-namespace SM {
+namespace Service::SM {
 class ServiceManager;
 }

--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector

 void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
                        u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
-                        const MathUtil::Rectangle<int>& crop_rect) {
+                        const Common::Rectangle<int>& crop_rect) {
    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
    LOG_TRACE(Service,
              "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
    /// Performs a screen flip, drawing the buffer pointed to by the handle.
    void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
              NVFlinger::BufferQueue::BufferTransformFlags transform,
-              const MathUtil::Rectangle<int>& crop_rect);
+              const Common::Rectangle<int>& crop_rect);

 private:
    std::shared_ptr<nvmap> nvmap_dev;
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
 }

 void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
-                              const MathUtil::Rectangle<int>& crop_rect) {
+                              const Common::Rectangle<int>& crop_rect) {
    auto itr = std::find_if(queue.begin(), queue.end(),
                            [&](const Buffer& buffer) { return buffer.slot == slot; });
    ASSERT(itr != queue.end());
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
        Status status = Status::Free;
        IGBPBuffer igbp_buffer;
        BufferTransformFlags transform;
-        MathUtil::Rectangle<int> crop_rect;
+        Common::Rectangle<int> crop_rect;
    };

    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
    std::optional<u32> DequeueBuffer(u32 width, u32 height);
    const IGBPBuffer& RequestBuffer(u32 slot) const;
    void QueueBuffer(u32 slot, BufferTransformFlags transform,
-                     const MathUtil::Rectangle<int>& crop_rect);
+                     const Common::Rectangle<int>& crop_rect);
    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
    u32 Query(QueryType type);
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -28,9 +28,13 @@ namespace Service::NVFlinger {
 constexpr std::size_t SCREEN_REFRESH_RATE = 60;
 constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);

-NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing)
-    : displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}},
-      core_timing{core_timing} {
+NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
+    displays.emplace_back(0, "Default");
+    displays.emplace_back(1, "External");
+    displays.emplace_back(2, "Edid");
+    displays.emplace_back(3, "Internal");
+    displays.emplace_back(4, "Null");
+
    // Schedule the screen composition events
    composition_event =
        core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -55,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
    // TODO(Subv): Currently we only support the Default display.
    ASSERT(name == "Default");

-    const auto itr = std::find_if(displays.begin(), displays.end(),
-                                  [&](const VI::Display& display) { return display.name == name; });
+    const auto itr =
+        std::find_if(displays.begin(), displays.end(),
+                     [&](const VI::Display& display) { return display.GetName() == name; });
    if (itr == displays.end()) {
        return {};
    }

-    return itr->id;
+    return itr->GetID();
 }

 std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -71,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
        return {};
    }

-    ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
-
    const u64 layer_id = next_layer_id++;
    const u32 buffer_queue_id = next_buffer_queue_id++;
-    auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
-    display->layers.emplace_back(layer_id, buffer_queue);
-    buffer_queues.emplace_back(std::move(buffer_queue));
+    buffer_queues.emplace_back(buffer_queue_id, layer_id);
+    display->CreateLayer(layer_id, buffer_queues.back());
    return layer_id;
 }

@@ -88,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
        return {};
    }

-    return layer->buffer_queue->GetId();
+    return layer->GetBufferQueue().GetId();
 }

 Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -98,12 +100,20 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
        return nullptr;
    }

-    return display->vsync_event.readable;
+    return display->GetVSyncEvent();
 }

-std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
+BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
-                                  [&](const auto& queue) { return queue->GetId() == id; });
+                                  [id](const auto& queue) { return queue.GetId() == id; });
+
+    ASSERT(itr != buffer_queues.end());
+    return *itr;
+}
+
+const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
+    const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
+                                  [id](const auto& queue) { return queue.GetId() == id; });

    ASSERT(itr != buffer_queues.end());
    return *itr;
@@ -112,7 +122,7 @@ std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
 VI::Display* NVFlinger::FindDisplay(u64 display_id) {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.id == display_id; });
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -124,7 +134,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
 const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
    const auto itr =
        std::find_if(displays.begin(), displays.end(),
-                     [&](const VI::Display& display) { return display.id == display_id; });
+                     [&](const VI::Display& display) { return display.GetID() == display_id; });

    if (itr == displays.end()) {
        return nullptr;
@@ -140,14 +150,7 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
        return nullptr;
    }

-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    return display->FindLayer(layer_id);
 }

 const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
@@ -157,33 +160,24 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
        return nullptr;
    }

-    const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
-                                  [&](const VI::Layer& layer) { return layer.id == layer_id; });
-
-    if (itr == display->layers.end()) {
-        return nullptr;
-    }
-
-    return &*itr;
+    return display->FindLayer(layer_id);
 }

 void NVFlinger::Compose() {
    for (auto& display : displays) {
        // Trigger vsync for this display at the end of drawing
-        SCOPE_EXIT({ display.vsync_event.writable->Signal(); });
+        SCOPE_EXIT({ display.SignalVSyncEvent(); });

        // Don't do anything for displays without layers.
-        if (display.layers.empty())
+        if (!display.HasLayers())
            continue;

        // TODO(Subv): Support more than 1 layer.
-        ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported");
-
-        VI::Layer& layer = display.layers[0];
-        auto& buffer_queue = layer.buffer_queue;
+        VI::Layer& layer = display.GetLayer(0);
+        auto& buffer_queue = layer.GetBufferQueue();

        // Search for a queued buffer and acquire it
-        auto buffer = buffer_queue->AcquireBuffer();
+        auto buffer = buffer_queue.AcquireBuffer();

        MicroProfileFlip();

@@ -208,7 +202,7 @@ void NVFlinger::Compose() {
                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue->ReleaseBuffer(buffer->get().slot);
+        buffer_queue.ReleaseBuffer(buffer->get().slot);
    }
 }

--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -28,8 +28,8 @@ class Module;
 } // namespace Service::Nvidia

 namespace Service::VI {
-struct Display;
-struct Layer;
+class Display;
+class Layer;
 } // namespace Service::VI

 namespace Service::NVFlinger {
@@ -65,7 +65,10 @@ public:
    Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;

    /// Obtains a buffer queue identified by the ID.
-    std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
+    BufferQueue& FindBufferQueue(u32 id);
+
+    /// Obtains a buffer queue identified by the ID.
+    const BufferQueue& FindBufferQueue(u32 id) const;

    /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
    /// finished.
@@ -87,7 +90,7 @@ private:
    std::shared_ptr<Nvidia::Module> nvdrv;

    std::vector<VI::Display> displays;
-    std::vector<std::shared_ptr<BufferQueue>> buffer_queues;
+    std::vector<BufferQueue> buffer_queues;

    /// Id to use for the next layer that is created, this counter is shared among all displays.
    u64 next_layer_id = 1;
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -2,8 +2,12 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <algorithm>
+#include <utility>
+
 #include <fmt/format.h>

+#include "common/assert.h"
 #include "core/core.h"
 #include "core/hle/kernel/readable_event.h"
 #include "core/hle/service/vi/display/vi_display.h"
@@ -19,4 +23,49 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {

 Display::~Display() = default;

+Layer& Display::GetLayer(std::size_t index) {
+    return layers.at(index);
+}
+
+const Layer& Display::GetLayer(std::size_t index) const {
+    return layers.at(index);
+}
+
+Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
+    return vsync_event.readable;
+}
+
+void Display::SignalVSyncEvent() {
+    vsync_event.writable->Signal();
+}
+
+void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
+    // TODO(Subv): Support more than 1 layer.
+    ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
+
+    layers.emplace_back(id, buffer_queue);
+}
+
+Layer* Display::FindLayer(u64 id) {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
+const Layer* Display::FindLayer(u64 id) const {
+    const auto itr = std::find_if(layers.begin(), layers.end(),
+                                  [id](const VI::Layer& layer) { return layer.GetID() == id; });
+
+    if (itr == layers.end()) {
+        return nullptr;
+    }
+
+    return &*itr;
+}
+
 } // namespace Service::VI
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -10,14 +10,84 @@
 #include "common/common_types.h"
 #include "core/hle/kernel/writable_event.h"

+namespace Service::NVFlinger {
+class BufferQueue;
+}
+
 namespace Service::VI {

-struct Layer;
+class Layer;

-struct Display {
+/// Represents a single display type
+class Display {
+public:
+    /// Constructs a display with a given unique ID and name.
+    ///
+    /// @param id   The unique ID for this display.
+    /// @param name The name for this display.
+    ///
    Display(u64 id, std::string name);
    ~Display();

+    Display(const Display&) = delete;
+    Display& operator=(const Display&) = delete;
+
+    Display(Display&&) = default;
+    Display& operator=(Display&&) = default;
+
+    /// Gets the unique ID assigned to this display.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets the name of this display
+    const std::string& GetName() const {
+        return name;
+    }
+
+    /// Whether or not this display has any layers added to it.
+    bool HasLayers() const {
+        return !layers.empty();
+    }
+
+    /// Gets a layer for this display based off an index.
+    Layer& GetLayer(std::size_t index);
+
+    /// Gets a layer for this display based off an index.
+    const Layer& GetLayer(std::size_t index) const;
+
+    /// Gets the readable vsync event.
+    Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
+
+    /// Signals the internal vsync event.
+    void SignalVSyncEvent();
+
+    /// Creates and adds a layer to this display with the given ID.
+    ///
+    /// @param id           The ID to assign to the created layer.
+    /// @param buffer_queue The buffer queue for the layer instance to use.
+    ///
+    void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    Layer* FindLayer(u64 id);
+
+    /// Attempts to find a layer with the given ID.
+    ///
+    /// @param id The layer ID.
+    ///
+    /// @returns If found, the Layer instance with the given ID.
+    ///          If not found, then nullptr is returned.
+    ///
+    const Layer* FindLayer(u64 id) const;
+
+private:
    u64 id;
    std::string name;

--- a/src/core/hle/service/vi/layer/vi_layer.cpp
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -6,8 +6,7 @@

 namespace Service::VI {

-Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue)
-    : id{id}, buffer_queue{std::move(queue)} {}
+Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}

 Layer::~Layer() = default;

--- a/src/core/hle/service/vi/layer/vi_layer.h
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -4,8 +4,6 @@

 #pragma once

-#include <memory>
-
 #include "common/common_types.h"

 namespace Service::NVFlinger {
@@ -14,12 +12,41 @@ class BufferQueue;

 namespace Service::VI {

-struct Layer {
-    Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue);
+/// Represents a single display layer.
+class Layer {
+public:
+    /// Constructs a layer with a given ID and buffer queue.
+    ///
+    /// @param id    The ID to assign to this layer.
+    /// @param queue The buffer queue for this layer to use.
+    ///
+    Layer(u64 id, NVFlinger::BufferQueue& queue);
    ~Layer();

+    Layer(const Layer&) = delete;
+    Layer& operator=(const Layer&) = delete;
+
+    Layer(Layer&&) = default;
+    Layer& operator=(Layer&&) = delete;
+
+    /// Gets the ID for this layer.
+    u64 GetID() const {
+        return id;
+    }
+
+    /// Gets a reference to the buffer queue this layer is using.
+    NVFlinger::BufferQueue& GetBufferQueue() {
+        return buffer_queue;
+    }
+
+    /// Gets a const reference to the buffer queue this layer is using.
+    const NVFlinger::BufferQueue& GetBufferQueue() const {
+        return buffer_queue;
+    }
+
+private:
    u64 id;
-    std::shared_ptr<NVFlinger::BufferQueue> buffer_queue;
+    NVFlinger::BufferQueue& buffer_queue;
 };

 } // namespace Service::VI
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -420,7 +420,7 @@ public:
        u32_le fence_is_valid;
        std::array<Fence, 2> fences;

-        MathUtil::Rectangle<int> GetCropRect() const {
+        Common::Rectangle<int> GetCropRect() const {
            return {crop_left, crop_top, crop_right, crop_bottom};
        }
    };
@@ -525,7 +525,7 @@ private:
        LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
                  static_cast<u32>(transaction), flags);

-        auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        auto& buffer_queue = nv_flinger->FindBufferQueue(id);

        if (transaction == TransactionId::Connect) {
            IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
        } else if (transaction == TransactionId::SetPreallocatedBuffer) {
            IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer);
+            buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);

            IGBPSetPreallocatedBufferResponseParcel response{};
            ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);

            if (slot) {
                // Buffer is available
@@ -559,8 +559,8 @@ private:
                    [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
-                        auto buffer_queue = nv_flinger->FindBufferQueue(id);
-                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+                        auto& buffer_queue = nv_flinger->FindBufferQueue(id);
+                        std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
                        ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");

                        IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
                        IPC::ResponseBuilder rb{ctx, 2};
                        rb.Push(RESULT_SUCCESS);
                    },
-                    buffer_queue->GetWritableBufferWaitEvent());
+                    buffer_queue.GetWritableBufferWaitEvent());
            }
        } else if (transaction == TransactionId::RequestBuffer) {
            IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};

-            auto& buffer = buffer_queue->RequestBuffer(request.slot);
+            auto& buffer = buffer_queue.RequestBuffer(request.slot);

            IGBPRequestBufferResponseParcel response{buffer};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::QueueBuffer) {
            IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};

-            buffer_queue->QueueBuffer(request.data.slot, request.data.transform,
-                                      request.data.GetCropRect());
+            buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
+                                     request.data.GetCropRect());

            IGBPQueueBufferResponseParcel response{1280, 720};
            ctx.WriteBuffer(response.Serialize());
        } else if (transaction == TransactionId::Query) {
            IGBPQueryRequestParcel request{ctx.ReadBuffer()};

-            u32 value =
-                buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
+            const u32 value =
+                buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));

            IGBPQueryResponseParcel response{value};
            ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:

        LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);

-        const auto buffer_queue = nv_flinger->FindBufferQueue(id);
+        const auto& buffer_queue = nv_flinger->FindBufferQueue(id);

        // TODO(Subv): Find out what this actually is.
        IPC::ResponseBuilder rb{ctx, 2, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent());
+        rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
    }

    std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +752,7 @@ public:
            {1102, nullptr, "GetDisplayResolution"},
            {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
            {2011, nullptr, "DestroyManagedLayer"},
+            {2012, nullptr, "CreateStrayLayer"},
            {2050, nullptr, "CreateIndirectLayer"},
            {2051, nullptr, "DestroyIndirectLayer"},
            {2052, nullptr, "CreateIndirectProducerEndPoint"},
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
                                 FlushMode::FlushAndInvalidate);

    VAddr end = base + size;
-    while (base != end) {
-        ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base);
+    ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
+               base + page_table.pointers.size());

-        page_table.attributes[base] = type;
-        page_table.pointers[base] = memory;
+    std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);

-        base += 1;
-        if (memory != nullptr)
+    if (memory == nullptr) {
+        std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
+    } else {
+        while (base != end) {
+            page_table.pointers[base] = memory;
+
+            base += 1;
            memory += PAGE_SIZE;
+        }
    }
 }

--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
    }

    void BeginTilt(int x, int y) {
-        mouse_origin = Math::MakeVec(x, y);
+        mouse_origin = Common::MakeVec(x, y);
        is_tilting = true;
    }

    void Tilt(int x, int y) {
-        auto mouse_move = Math::MakeVec(x, y) - mouse_origin;
+        auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
        if (is_tilting) {
            std::lock_guard<std::mutex> guard(tilt_mutex);
            if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
            } else {
                tilt_direction = mouse_move.Cast<float>();
                tilt_angle =
-                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f);
+                    std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
            }
        }
    }
@@ -56,7 +56,7 @@ public:
        is_tilting = false;
    }

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
        std::lock_guard<std::mutex> guard(status_mutex);
        return status;
    }
@@ -66,17 +66,17 @@ private:
    const std::chrono::steady_clock::duration update_duration;
    const float sensitivity;

-    Math::Vec2<int> mouse_origin;
+    Common::Vec2<int> mouse_origin;

    std::mutex tilt_mutex;
-    Math::Vec2<float> tilt_direction;
+    Common::Vec2<float> tilt_direction;
    float tilt_angle = 0;

    bool is_tilting = false;

    Common::Event shutdown_event;

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> status;
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
    std::mutex status_mutex;

    // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:

    void MotionEmuThread() {
        auto update_time = std::chrono::steady_clock::now();
-        Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0);
-        Math::Quaternion<float> old_q;
+        Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
+        Common::Quaternion<float> old_q;

        while (!shutdown_event.WaitUntil(update_time)) {
            update_time += update_duration;
@@ -96,18 +96,18 @@ private:
                std::lock_guard<std::mutex> guard(tilt_mutex);

                // Find the quaternion describing current 3DS tilting
-                q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x),
-                                   tilt_angle);
+                q = Common::MakeQuaternion(
+                    Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
            }

            auto inv_q = q.Inverse();

            // Set the gravity vector in world space
-            auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f);
+            auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);

            // Find the angular rate vector in world space
            auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
-            angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180;
+            angular_rate *= 1000 / update_millisecond / Common::PI * 180;

            // Transform the two vectors from world space to 3DS space
            gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
        device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
    }

-    std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override {
+    std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
        return device->GetStatus();
    }

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -106,8 +106,14 @@ if (ENABLE_VULKAN)
        renderer_vulkan/declarations.h
        renderer_vulkan/vk_device.cpp
        renderer_vulkan/vk_device.h
+        renderer_vulkan/vk_memory_manager.cpp
+        renderer_vulkan/vk_memory_manager.h
        renderer_vulkan/vk_resource_manager.cpp
-        renderer_vulkan/vk_resource_manager.h)
+        renderer_vulkan/vk_resource_manager.h
+        renderer_vulkan/vk_scheduler.cpp
+        renderer_vulkan/vk_scheduler.h
+        renderer_vulkan/vk_stream_buffer.cpp
+        renderer_vulkan/vk_stream_buffer.h)

    target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
    target_compile_definitions(video_core PRIVATE HAS_VULKAN)
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
 }

 bool DmaPusher::Step() {
-    if (dma_get != dma_put) {
-        // Push buffer non-empty, read a word
-        const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
-        ASSERT_MSG(address, "Invalid GPU address");
+    if (!ib_enable || dma_pushbuffer.empty()) {
+        // pushbuffer empty and IB empty or nonexistent - nothing to do
+        return false;
+    }

-        const CommandHeader command_header{Memory::Read32(*address)};
+    const CommandList& command_list{dma_pushbuffer.front()};
+    const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
+    GPUVAddr dma_get = command_list_header.addr;
+    GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
+    bool non_main = command_list_header.is_non_main;

-        dma_get += sizeof(u32);
+    if (dma_pushbuffer_subindex >= command_list.size()) {
+        // We've gone through the current list, remove it from the queue
+        dma_pushbuffer.pop();
+        dma_pushbuffer_subindex = 0;
+    }

-        if (!non_main) {
-            dma_mget = dma_get;
-        }
+    if (command_list_header.size == 0) {
+        return true;
+    }
+
+    // Push buffer non-empty, read a word
+    const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
+    ASSERT_MSG(address, "Invalid GPU address");
+
+    command_headers.resize(command_list_header.size);
+
+    Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
+
+    for (const CommandHeader& command_header : command_headers) {

        // now, see if we're in the middle of a command
        if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
                break;
            }
        }
-    } else if (ib_enable && !dma_pushbuffer.empty()) {
-        // Current pushbuffer empty, but we have more IB entries to read
-        const CommandList& command_list{dma_pushbuffer.front()};
-        const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
-        dma_get = command_list_header.addr;
-        dma_put = dma_get + command_list_header.size * sizeof(u32);
-        non_main = command_list_header.is_non_main;
+    }

-        if (dma_pushbuffer_subindex >= command_list.size()) {
-            // We've gone through the current list, remove it from the queue
-            dma_pushbuffer.pop();
-            dma_pushbuffer_subindex = 0;
-        }
-    } else {
-        // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
-        return {};
+    if (!non_main) {
+        // TODO (degasus): This is dead code, as dma_mget is never read.
+        dma_mget = dma_put;
    }

    return true;
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:

    GPU& gpu;

+    std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
+
    std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
    std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer

@@ -89,11 +91,8 @@ private:
    DmaState dma_state{};
    bool dma_increment_once{};

-    GPUVAddr dma_put{};   ///< pushbuffer current end address
-    GPUVAddr dma_get{};   ///< pushbuffer current read address
    GPUVAddr dma_mget{};  ///< main pushbuffer last read address
    bool ib_enable{true}; ///< IB mode enabled
-    bool non_main{};      ///< non-main pushbuffer active
 };

 } // namespace Tegra
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
    const u32 src_blit_y2{
        static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};

-    const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
-    const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
-                                            regs.blit_dst_x + regs.blit_dst_width,
-                                            regs.blit_dst_y + regs.blit_dst_height};
+    const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
+    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
+                                          regs.blit_dst_x + regs.blit_dst_width,
+                                          regs.blit_dst_y + regs.blit_dst_height};

    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
        UNIMPLEMENTED();
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/memory.h"
@@ -11,9 +12,9 @@

 namespace Tegra::Engines {

-KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
+KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                           MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+    : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {}

 KeplerMemory::~KeplerMemory() = default;

@@ -50,7 +51,7 @@ void KeplerMemory::ProcessData(u32 data) {
    rasterizer.InvalidateRegion(*dest_address, sizeof(u32));

    Memory::Write32(*dest_address, data);
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    state.write_offset++;
 }
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -5,13 +5,16 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -23,7 +26,8 @@ namespace Tegra::Engines {

 class KeplerMemory final {
 public:
-    KeplerMemory(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                 MemoryManager& memory_manager);
    ~KeplerMemory();

    /// Write the value to the register identified by method.
@@ -76,6 +80,7 @@ public:
    } state{};

 private:
+    Core::System& system;
    MemoryManager& memory_manager;
    VideoCore::RasterizerInterface& rasterizer;

--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -19,8 +19,10 @@ namespace Tegra::Engines {
 /// First register id that is actually a Macro call.
 constexpr u32 MacroRegistersStart = 0xE00;

-Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                     MemoryManager& memory_manager)
+    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer},
+      macro_interpreter(*this) {
    InitializeRegisterDefaults();
 }

@@ -103,23 +105,25 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 }

 void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();
+
+    const u32 method = method_call.method;

    // It is an error to write to a register other than the current macro's ARG register before it
    // has finished execution.
    if (executing_macro != 0) {
-        ASSERT(method_call.method == executing_macro + 1);
+        ASSERT(method == executing_macro + 1);
    }

    // Methods after 0xE00 are special, they're actually triggers for some microcode that was
    // uploaded to the GPU during initialization.
-    if (method_call.method >= MacroRegistersStart) {
+    if (method >= MacroRegistersStart) {
        // We're trying to execute a macro
        if (executing_macro == 0) {
            // A macro call must begin by writing the macro method's register, not its argument.
-            ASSERT_MSG((method_call.method % 2) == 0,
+            ASSERT_MSG((method % 2) == 0,
                       "Can't start macro execution by writing to the ARGS register");
-            executing_macro = method_call.method;
+            executing_macro = method;
        }

        macro_params.push_back(method_call.argument);
@@ -131,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
        return;
    }

-    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
+    ASSERT_MSG(method < Regs::NUM_REGS,
               "Invalid Maxwell3D register, increase the size of the Regs structure");

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
    }

-    if (regs.reg_array[method_call.method] != method_call.argument) {
-        regs.reg_array[method_call.method] = method_call.argument;
+    if (regs.reg_array[method] != method_call.argument) {
+        regs.reg_array[method] = method_call.argument;
        // Color buffers
        constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
        constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method_call.method >= first_rt_reg &&
-            method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index);
+        if (method >= first_rt_reg &&
+            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
+            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
+            dirty_flags.color_buffer.set(rt_index);
        }

        // Zeta buffer
        constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method_call.method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method_call.method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
+        if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
+            method == MAXWELL3D_REG_INDEX(zeta_width) ||
+            method == MAXWELL3D_REG_INDEX(zeta_height) ||
+            (method >= MAXWELL3D_REG_INDEX(zeta) &&
+             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
            dirty_flags.zeta_buffer = true;
        }

        // Shader
        constexpr u32 shader_registers_count =
            sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
-        if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
-            method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
+        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
+            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
            dirty_flags.shaders = true;
        }

        // Vertex format
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
-            method_call.method <
-                MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
            dirty_flags.vertex_attrib_format = true;
        }

        // Vertex buffer
-        if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
-            dirty_flags.vertex_array |=
-                1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
-        } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
-            dirty_flags.vertex_array |=
-                1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
+            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
+        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
+                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
+            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
+        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
+                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
+            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
        }
    }

-    switch (method_call.method) {
+    switch (method) {
    case MAXWELL3D_REG_INDEX(macros.data): {
        ProcessMacroUpload(method_call.argument);
        break;
@@ -317,7 +317,7 @@ void Maxwell3D::ProcessQueryGet() {
            LongQueryResult query_result{};
            query_result.value = result;
            // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
-            query_result.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+            query_result.timestamp = system.CoreTiming().GetTicks();
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
        }
        dirty_flags.OnMemoryWrite();
@@ -334,7 +334,7 @@ void Maxwell3D::DrawArrays() {
              regs.vertex_buffer.count);
    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");

-    auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
+    auto debug_context = system.GetGPUDebugContext();

    if (debug_context) {
        debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
 #pragma once

 #include <array>
+#include <bitset>
 #include <unordered_map>
 #include <vector>
+
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
@@ -17,6 +19,10 @@
 #include "video_core/memory_manager.h"
 #include "video_core/textures/texture.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -28,7 +34,8 @@ namespace Tegra::Engines {

 class Maxwell3D final {
 public:
-    explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager);
    ~Maxwell3D() = default;

    /// Register structure of the Maxwell3D engine.
@@ -498,7 +505,7 @@ public:
            f32 translate_z;
            INSERT_PADDING_WORDS(2);

-            MathUtil::Rectangle<s32> GetRect() const {
+            Common::Rectangle<s32> GetRect() const {
                return {
                    GetX(),               // left
                    GetY() + GetHeight(), // top
@@ -1089,19 +1096,18 @@ public:
    MemoryManager& memory_manager;

    struct DirtyFlags {
-        u8 color_buffer = 0xFF;
-        bool zeta_buffer = true;
-
-        bool shaders = true;
+        std::bitset<8> color_buffer{0xFF};
+        std::bitset<32> vertex_array{0xFFFFFFFF};

        bool vertex_attrib_format = true;
-        u32 vertex_array = 0xFFFFFFFF;
+        bool zeta_buffer = true;
+        bool shaders = true;

        void OnMemoryWrite() {
-            color_buffer = 0xFF;
            zeta_buffer = true;
            shaders = true;
-            vertex_array = 0xFFFFFFFF;
+            color_buffer.set();
+            vertex_array.set();
        }
    };

@@ -1131,6 +1137,8 @@ public:
 private:
    void InitializeRegisterDefaults();

+    Core::System& system;
+
    VideoCore::RasterizerInterface& rasterizer;

    /// Start offsets of each macro in macro_memory
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "common/assert.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -11,8 +12,9 @@

 namespace Tegra::Engines {

-MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
-    : memory_manager(memory_manager), rasterizer{rasterizer} {}
+MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                       MemoryManager& memory_manager)
+    : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {}

 void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
    ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -59,7 +61,7 @@ void MaxwellDMA::HandleCopy() {
    }

    // All copies here update the main memory, so mark all rasterizer states as invalid.
-    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();

    if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -5,13 +5,16 @@
 #pragma once

 #include <array>
-#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -20,7 +23,8 @@ namespace Tegra::Engines {

 class MaxwellDMA final {
 public:
-    explicit MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+    explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                        MemoryManager& memory_manager);
    ~MaxwellDMA() = default;

    /// Write the value to the register identified by method.
@@ -137,6 +141,8 @@ public:
    MemoryManager& memory_manager;

 private:
+    Core::System& system;
+
    VideoCore::RasterizerInterface& rasterizer;

    /// Performs the copy from the source buffer to the destination buffer as configured in the
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };

 enum class IpaInterpMode : u64 {
-    Linear = 0,
-    Perspective = 1,
-    Flat = 2,
+    Pass = 0,
+    Multiply = 1,
+    Constant = 2,
    Sc = 3,
 };

--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
    TriangleStrip = 7,
 };

+enum class AttributeUse : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -84,9 +91,15 @@ struct Header {
        } vtg;

        struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            union {
+                BitField<0, 2, AttributeUse> x;
+                BitField<2, 2, AttributeUse> y;
+                BitField<4, 2, AttributeUse> w;
+                BitField<6, 2, AttributeUse> z;
+                u8 raw;
+            } imap_generic_vector[32];
            INSERT_PADDING_BYTES(2);  // ImapColor
            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
                const u32 bit = render_target * 4 + component;
                return omap.target & (1 << bit);
            }
+            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
+                return static_cast<AttributeUse>(
+                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
+            }
+            AttributeUse GetAttributeUse(u32 attribute) const {
+                AttributeUse result = AttributeUse::Unused;
+                for (u32 i = 0; i < 4; i++) {
+                    const auto index = GetAttributeIndexUse(attribute, i);
+                    if (index == AttributeUse::Unused) {
+                        continue;
+                    }
+                    if (result == AttributeUse::Unused || result == index) {
+                        result = index;
+                        continue;
+                    }
+                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
+                    if (index == AttributeUse::Perspective) {
+                        result = index;
+                    }
+                }
+                return result;
+            }
        } ps;
    };

--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -28,14 +28,14 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
    UNREACHABLE();
 }

-GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
+GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) {
    memory_manager = std::make_unique<Tegra::MemoryManager>();
    dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
-    maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
    fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
    kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager);
-    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(rasterizer, *memory_manager);
-    kepler_memory = std::make_unique<Engines::KeplerMemory>(rasterizer, *memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager);
 }

 GPU::~GPU() = default;
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,12 +6,15 @@

 #include <array>
 #include <memory>
-#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/memory_manager.h"

+namespace Core {
+class System;
+}
+
 namespace VideoCore {
 class RasterizerInterface;
 }
@@ -97,7 +100,7 @@ struct FramebufferConfig {

    using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
    TransformFlags transform_flags;
-    MathUtil::Rectangle<int> crop_rect;
+    Common::Rectangle<int> crop_rect;
 };

 namespace Engines {
@@ -118,7 +121,7 @@ enum class EngineID {

 class GPU final {
 public:
-    explicit GPU(VideoCore::RasterizerInterface& rasterizer);
+    explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
    ~GPU();

    struct MethodCall {
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
        return ++modified_ticks;
    }

+    /// Flushes the specified object, updating appropriate cache state as needed
+    void FlushObject(const T& object) {
+        if (!object->IsDirty()) {
+            return;
+        }
+        object->Flush();
+        object->MarkAsModified(false, *this);
+    }
+
 private:
    /// Returns a list of cached objects from the specified memory region, ordered by access time
    std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
        return objects;
    }

-    /// Flushes the specified object, updating appropriate cache state as needed
-    void FlushObject(const T& object) {
-        if (!object->IsDirty()) {
-            return;
-        }
-        object->Flush();
-        object->MarkAsModified(false, *this);
-    }
-
    using ObjectSet = std::set<T>;
    using ObjectCache = std::unordered_map<VAddr, T>;
    using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
    /// Attempt to use a faster method to perform a surface copy
    virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                       const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                       const MathUtil::Rectangle<u32>& src_rect,
-                                       const MathUtil::Rectangle<u32>& dst_rect) {
+                                       const Common::Rectangle<u32>& src_rect,
+                                       const Common::Rectangle<u32>& dst_rect) {
        return false;
    }

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
                                   ScreenInfo& info)
-    : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
+    : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
+      screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
    }

    // Rebinding the VAO invalidates the vertex buffer bindings.
-    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+    gpu.dirty_flags.vertex_array.set();

    state.draw.vertex_array = vao_entry.handle;
    return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

-    if (!gpu.dirty_flags.vertex_array)
+    if (gpu.dirty_flags.vertex_array.none())
        return;

    MICROPROFILE_SCOPE(OpenGL_VB);

    // Upload all guest vertex arrays sequentially to our buffer
    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (~gpu.dirty_flags.vertex_array & (1u << index))
+        if (!gpu.dirty_flags.vertex_array[index])
            continue;

        const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
        }
    }

-    gpu.dirty_flags.vertex_array = 0;
+    gpu.dirty_flags.vertex_array.reset();
 }

 DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
    OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
    std::optional<std::size_t> single_color_target) {
    MICROPROFILE_SCOPE(OpenGL_Framebuffer);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

    const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
                                                 single_color_target};
-    if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 &&
-        !gpu.dirty_flags.zeta_buffer) {
+    if (fb_config_state == current_framebuffer_config_state &&
+        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
        // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
        // single color targets). This is done because the guest registers may not change but the
        // host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
    // Add space for at least 18 constant buffers
    buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);

-    bool invalidate = buffer_cache.Map(buffer_size);
+    const bool invalidate = buffer_cache.Map(buffer_size);
    if (invalidate) {
        // As all cached buffers are invalidated, we need to recheck their state.
-        gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
+        gpu.dirty_flags.vertex_array.set();
    }

    const GLuint vao = SetupVertexFormat();
@@ -738,19 +738,11 @@ void RasterizerOpenGL::DrawArrays() {
    shader_program_manager->ApplyTo(state);
    state.Apply();

-    // Execute draw call
+    res_cache.SignalPreDrawCall();
    params.DispatchDraw();
-
-    // Disable scissor test
-    state.viewports[0].scissor.enabled = false;
+    res_cache.SignalPostDrawCall();

    accelerate_draw = AccelDraw::Disabled;
-
-    // Unbind textures for potential future use as framebuffer attachments
-    for (auto& texture_unit : state.texture_units) {
-        texture_unit.Unbind();
-    }
-    state.Apply();
 }

 void RasterizerOpenGL::FlushAll() {}
@@ -779,8 +771,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {

 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                                             const MathUtil::Rectangle<u32>& src_rect,
-                                             const MathUtil::Rectangle<u32>& dst_rect) {
+                                             const Common::Rectangle<u32>& src_rect,
+                                             const Common::Rectangle<u32>& dst_rect) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
    res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
    return true;
@@ -1034,7 +1026,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
    for (std::size_t i = 0; i < viewport_count; i++) {
        auto& viewport = current_state.viewports[i];
        const auto& src = regs.viewports[i];
-        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
        viewport.x = viewport_rect.left;
        viewport.y = viewport_rect.bottom;
        viewport.width = viewport_rect.GetWidth();
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
-                               const MathUtil::Rectangle<u32>& src_rect,
-                               const MathUtil::Rectangle<u32>& dst_rect) override;
+                               const Common::Rectangle<u32>& src_rect,
+                               const Common::Rectangle<u32>& dst_rect) override;
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    bool AccelerateDrawBatch(bool is_indexed) override;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <optional>
 #include <glad/glad.h>

 #include "common/alignment.h"
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
    u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
    if (IsPixelFormatASTC(pixel_format)) {
        // ASTC formats must stop at the ATSC block size boundary
@@ -423,7 +424,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
        for (u32 i = 0; i < params.depth; i++) {
            MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
                          params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
-                          params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
+                          params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
                          gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
    // alternatives. This signals a bug on those functions.
    const auto width = static_cast<GLsizei>(params.MipWidth(0));
    const auto height = static_cast<GLsizei>(params.MipHeight(0));
+    memory_size = params.MemorySize();
+    reinterpreted = false;

    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
    gl_internal_format = format_tuple.internal_format;
@@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
    auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
    const auto& regs{gpu.regs};

-    if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) {
+    if (!gpu.dirty_flags.color_buffer[index]) {
        return last_color_buffers[index];
    }
-    gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index));
+    gpu.dirty_flags.color_buffer.reset(index);

    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);

    if (index >= regs.rt_control.count) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
    }

    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
-        return last_color_buffers[index] = {};
+        return current_color_buffers[index] = {};
    }

    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};

-    return last_color_buffers[index] = GetSurface(color_params, preserve_contents);
+    return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
 }

 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
    surface->LoadGLBuffer();
    surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
    surface->MarkAsModified(false, *this);
+    surface->MarkForReload(false);
 }

 Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
    Surface surface{TryGet(params.addr)};
    if (surface) {
        if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
-            // Use the cached surface as-is
+            // Use the cached surface as-is unless it's not synced with memory
+            if (surface->MustReload())
+                LoadSurface(surface);
            return surface;
        } else if (preserve_contents) {
            // If surface parameters changed and we care about keeping the previous data, recreate
            // the surface from the old one
            Surface new_surface{RecreateSurface(surface, params)};
-            Unregister(surface);
+            UnregisterSurface(surface);
            Register(new_surface);
+            if (new_surface->IsUploaded()) {
+                RegisterReinterpretSurface(new_surface);
+            }
            return new_surface;
        } else {
            // Delete the old surface before creating a new one to prevent collisions.
-            Unregister(surface);
+            UnregisterSurface(surface);
        }
    }

@@ -1062,8 +1071,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
 }

 static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
-                        const MathUtil::Rectangle<u32>& src_rect,
-                        const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
+                        const Common::Rectangle<u32>& src_rect,
+                        const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
                        GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
                        std::size_t cubemap_face = 0) {

@@ -1193,7 +1202,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
 void RasterizerCacheOpenGL::FermiCopySurface(
    const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
    const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-    const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) {
+    const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {

    const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
    const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1257,7 +1266,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    case SurfaceTarget::TextureCubemap:
    case SurfaceTarget::Texture2DArray:
    case SurfaceTarget::TextureCubeArray:
-        FastLayeredCopySurface(old_surface, new_surface);
+        if (old_params.pixel_format == new_params.pixel_format)
+            FastLayeredCopySurface(old_surface, new_surface);
+        else {
+            AccurateCopySurface(old_surface, new_surface);
+        }
        break;
    default:
        LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1286,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
    return {};
 }

+static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
+                                            u32 height) {
+    for (u32 i = 0; i < params.max_mip_level; i++) {
+        if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
+            return {i};
+        }
+    }
+    return {};
+}
+
+static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
+    const std::size_t size = params.LayerMemorySize();
+    VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
+    for (u32 i = 0; i < params.depth; i++) {
+        if (start == addr) {
+            return {i};
+        }
+        start += size;
+    }
+    return {};
+}
+
+static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    const std::size_t src_memory_size = src_params.size_in_bytes;
+    const std::optional<u32> level =
+        TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
+    if (level.has_value()) {
+        if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
+            src_params.height == dst_params.MipHeight(*level) &&
+            src_params.block_height >= dst_params.MipBlockHeight(*level)) {
+            const std::optional<u32> slot =
+                TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
+            if (slot.has_value()) {
+                glCopyImageSubData(render_surface->Texture().handle,
+                                   SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
+                                   blitted_surface->Texture().handle,
+                                   SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
+                                   dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
+                blitted_surface->MarkAsModified(true, cache);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
+    const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
+    const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
+    if (bound2 > bound1)
+        return true;
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.component_type != src_params.component_type);
+}
+
+static bool IsReinterpretInvalidSecond(const Surface render_surface,
+                                       const Surface blitted_surface) {
+    const auto& dst_params = blitted_surface->GetSurfaceParams();
+    const auto& src_params = render_surface->GetSurfaceParams();
+    return (dst_params.height > src_params.height && dst_params.width > src_params.width);
+}
+
+bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
+                                                      Surface intersect) {
+    if (IsReinterpretInvalid(triggering_surface, intersect)) {
+        UnregisterSurface(intersect);
+        return false;
+    }
+    if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
+        if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
+            UnregisterSurface(intersect);
+            return false;
+        }
+        FlushObject(intersect);
+        FlushObject(triggering_surface);
+        intersect->MarkForReload(true);
+    }
+    return true;
+}
+
+void RasterizerCacheOpenGL::SignalPreDrawCall() {
+    if (texception && GLAD_GL_ARB_texture_barrier) {
+        glTextureBarrier();
+    }
+    texception = false;
+}
+
+void RasterizerCacheOpenGL::SignalPostDrawCall() {
+    for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
+        if (current_color_buffers[i] != nullptr) {
+            Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
+            if (intersect != nullptr) {
+                PartialReinterpretSurface(current_color_buffers[i], intersect);
+                texception = true;
+            }
+        }
+    }
+}
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {

 class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
-using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
+using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;

 using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
 using SurfaceType = VideoCore::Surface::SurfaceType;
 using PixelFormat = VideoCore::Surface::PixelFormat;
 using ComponentType = VideoCore::Surface::ComponentType;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;

 struct SurfaceParams {
    enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
    }

    /// Returns the rectangle corresponding to this surface
-    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
+    Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
        return offset;
    }

+    std::size_t GetMipmapSingleSize(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, false, is_layered);
+    }
+
    u32 MipWidth(u32 mip_level) const {
        return std::max(1U, width >> mip_level);
    }

+    u32 MipWidthGobAligned(u32 mip_level) const {
+        return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
+    }
+
    u32 MipHeight(u32 mip_level) const {
        return std::max(1U, height >> mip_level);
    }
@@ -346,6 +355,10 @@ public:
        return cached_size_in_bytes;
    }

+    std::size_t GetMemorySize() const {
+        return memory_size;
+    }
+
    void Flush() override {
        FlushGLBuffer();
    }
@@ -395,6 +408,26 @@ public:
                       Tegra::Texture::SwizzleSource swizzle_z,
                       Tegra::Texture::SwizzleSource swizzle_w);

+    void MarkReinterpreted() {
+        reinterpreted = true;
+    }
+
+    bool IsReinterpreted() const {
+        return reinterpreted;
+    }
+
+    void MarkForReload(bool reload) {
+        must_reload = reload;
+    }
+
+    bool MustReload() const {
+        return must_reload;
+    }
+
+    bool IsUploaded() const {
+        return params.identity == SurfaceParams::SurfaceClass::Uploaded;
+    }
+
 private:
    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);

@@ -408,6 +441,9 @@ private:
    GLenum gl_internal_format{};
    std::size_t cached_size_in_bytes{};
    std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+    std::size_t memory_size;
+    bool reinterpreted = false;
+    bool must_reload = false;
 };

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -430,8 +466,11 @@ public:
    /// Copies the contents of one surface to another
    void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
                          const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
-                          const MathUtil::Rectangle<u32>& src_rect,
-                          const MathUtil::Rectangle<u32>& dst_rect);
+                          const Common::Rectangle<u32>& src_rect,
+                          const Common::Rectangle<u32>& dst_rect);
+
+    void SignalPreDrawCall();
+    void SignalPostDrawCall();

 private:
    void LoadSurface(const Surface& surface);
@@ -449,6 +488,10 @@ private:
    /// Tries to get a reserved surface for the specified parameters
    Surface TryGetReservedSurface(const SurfaceParams& params);

+    // Partialy reinterpret a surface based on a triggering_surface that collides with it.
+    // returns true if the reinterpret was successful, false in case it was not.
+    bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
+
    /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
    void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
    void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +508,50 @@ private:
    OGLFramebuffer read_framebuffer;
    OGLFramebuffer draw_framebuffer;

+    bool texception = false;
+
    /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
    /// using the new format.
    OGLBuffer copy_pbo;

-    std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
+    std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
    Surface last_depth_buffer;
+
+    using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
+    using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
+
+    static auto GetReinterpretInterval(const Surface& object) {
+        return SurfaceInterval::right_open(object->GetAddr() + 1,
+                                           object->GetAddr() + object->GetMemorySize() - 1);
+    }
+
+    // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
+    SurfaceIntervalCache reinterpreted_surfaces;
+
+    void RegisterReinterpretSurface(Surface reinterpret_surface) {
+        auto interval = GetReinterpretInterval(reinterpret_surface);
+        reinterpreted_surfaces.insert({interval, reinterpret_surface});
+        reinterpret_surface->MarkReinterpreted();
+    }
+
+    Surface CollideOnReinterpretedSurface(VAddr addr) const {
+        const SurfaceInterval interval{addr};
+        for (auto& pair :
+             boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
+            return pair.second;
+        }
+        return nullptr;
+    }
+
+    /// Unregisters an object from the cache
+    void UnregisterSurface(const Surface& object) {
+        if (object->IsReinterpreted()) {
+            auto interval = GetReinterpretInterval(object);
+            reinterpreted_surfaces.erase(interval);
+        }
+        Unregister(object);
+    }
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,6 +20,7 @@
 namespace OpenGL::GLShader {

 using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
        code.AddNewLine();
    }

-    std::string GetInputFlags(const IpaMode& input_mode) {
-        const IpaSampleMode sample_mode = input_mode.sampling_mode;
-        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
+    std::string GetInputFlags(AttributeUse attribute) {
        std::string out;

-        switch (interp_mode) {
-        case IpaInterpMode::Flat:
+        switch (attribute) {
+        case AttributeUse::Constant:
            out += "flat ";
            break;
-        case IpaInterpMode::Linear:
+        case AttributeUse::ScreenLinear:
            out += "noperspective ";
            break;
-        case IpaInterpMode::Perspective:
+        case AttributeUse::Perspective:
            // Default, Smooth
            break;
        default:
-            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
-        }
-        switch (sample_mode) {
-        case IpaSampleMode::Centroid:
-            // It can be implemented with the "centroid " keyword in GLSL
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
-            break;
-        case IpaSampleMode::Default:
-            // Default, n/a
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
+            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
+            UNREACHABLE();
        }
        return out;
    }
@@ -324,16 +313,11 @@ private:
        const auto& attributes = ir.GetInputAttributes();
        for (const auto element : attributes) {
            const Attribute::Index index = element.first;
-            const IpaMode& input_mode = *element.second.begin();
            if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                // Skip when it's not a generic attribute
                continue;
            }

-            ASSERT(element.second.size() > 0);
-            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
-                                 "Multiple input flag modes are not supported in GLSL");
-
            // TODO(bunnei): Use proper number of elements for these
            u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
            if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
            if (stage == ShaderStage::Geometry) {
                attr = "gs_" + attr + "[]";
            }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
-                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
+            std::string suffix;
+            if (stage == ShaderStage::Fragment) {
+                const auto input_mode =
+                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
+                suffix = GetInputFlags(input_mode);
+            }
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
+                         attr + ';');
        }
        if (!attributes.empty())
            code.AddNewLine();
@@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#pragma once
-
 #include <cstring>
 #include <fmt/format.h>
 #include <lz4.h>
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;

-layout (location = 0) in vec4 position;
+layout (location = 0) in noperspective vec4 position;

 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
    vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
    return {out, program.second};
 }

-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,7 +11,9 @@
 namespace OpenGL {

 OpenGLState OpenGLState::cur_state;
+
 bool OpenGLState::s_rgb_used;
+
 OpenGLState::OpenGLState() {
    // These all match default OpenGL values
    geometry_shaders.enabled = false;
@@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() {
 }

 void OpenGLState::ApplySRgb() const {
-    // sRGB
    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
        if (framebuffer_srgb.enabled) {
            // Track if sRGB is used
@@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const {
 }

 void OpenGLState::ApplyCulling() const {
-    // Culling
-    const bool cull_changed = cull.enabled != cur_state.cull.enabled;
-    if (cull_changed) {
+    if (cull.enabled != cur_state.cull.enabled) {
        if (cull.enabled) {
            glEnable(GL_CULL_FACE);
        } else {
            glDisable(GL_CULL_FACE);
        }
    }
-    if (cull.enabled) {
-        if (cull_changed || cull.mode != cur_state.cull.mode) {
-            glCullFace(cull.mode);
-        }

-        if (cull_changed || cull.front_face != cur_state.cull.front_face) {
-            glFrontFace(cull.front_face);
-        }
+    if (cull.mode != cur_state.cull.mode) {
+        glCullFace(cull.mode);
+    }
+
+    if (cull.front_face != cur_state.cull.front_face) {
+        glFrontFace(cull.front_face);
    }
 }

@@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const {
 }

 void OpenGLState::ApplyDepth() const {
-    // Depth test
-    const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
-    if (depth_test_changed) {
+    if (depth.test_enabled != cur_state.depth.test_enabled) {
        if (depth.test_enabled) {
            glEnable(GL_DEPTH_TEST);
        } else {
            glDisable(GL_DEPTH_TEST);
        }
    }
-    if (depth.test_enabled &&
-        (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
+
+    if (depth.test_func != cur_state.depth.test_func) {
        glDepthFunc(depth.test_func);
    }
-    // Depth mask
+
    if (depth.write_mask != cur_state.depth.write_mask) {
        glDepthMask(depth.write_mask);
    }
 }

 void OpenGLState::ApplyPrimitiveRestart() const {
-    const bool primitive_restart_changed =
-        primitive_restart.enabled != cur_state.primitive_restart.enabled;
-    if (primitive_restart_changed) {
+    if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
        if (primitive_restart.enabled) {
            glEnable(GL_PRIMITIVE_RESTART);
        } else {
            glDisable(GL_PRIMITIVE_RESTART);
        }
    }
-    if (primitive_restart_changed ||
-        (primitive_restart.enabled &&
-         primitive_restart.index != cur_state.primitive_restart.index)) {
+
+    if (primitive_restart.index != cur_state.primitive_restart.index) {
        glPrimitiveRestartIndex(primitive_restart.index);
    }
 }

 void OpenGLState::ApplyStencilTest() const {
-    const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
-    if (stencil_test_changed) {
+    if (stencil.test_enabled != cur_state.stencil.test_enabled) {
        if (stencil.test_enabled) {
            glEnable(GL_STENCIL_TEST);
        } else {
            glDisable(GL_STENCIL_TEST);
        }
    }
-    if (stencil.test_enabled) {
-        auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
-                                                     const auto& prev_config) {
-            if (stencil_test_changed || config.test_func != prev_config.test_func ||
-                config.test_ref != prev_config.test_ref ||
-                config.test_mask != prev_config.test_mask) {
-                glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
-            }
-            if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
-                config.action_depth_pass != prev_config.action_depth_pass ||
-                config.action_stencil_fail != prev_config.action_stencil_fail) {
-                glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
-                                    config.action_depth_pass);
-            }
-            if (config.write_mask != prev_config.write_mask) {
-                glStencilMaskSeparate(face, config.write_mask);
-            }
-        };
-        config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
-        config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
-    }
+
+    const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
+        if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
+            config.test_mask != prev_config.test_mask) {
+            glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+        }
+        if (config.action_depth_fail != prev_config.action_depth_fail ||
+            config.action_depth_pass != prev_config.action_depth_pass ||
+            config.action_stencil_fail != prev_config.action_stencil_fail) {
+            glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+                                config.action_depth_pass);
+        }
+        if (config.write_mask != prev_config.write_mask) {
+            glStencilMaskSeparate(face, config.write_mask);
+        }
+    };
+    ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+    ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
 // Viewport does not affects glClearBuffer so emulate viewport using scissor test
 void OpenGLState::EmulateViewportWithScissor() {
@@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const {
                updated.depth_range_far != current.depth_range_far) {
                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
            }
-            const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-            if (scissor_changed) {
+
+            if (updated.scissor.enabled != current.scissor.enabled) {
                if (updated.scissor.enabled) {
                    glEnablei(GL_SCISSOR_TEST, i);
                } else {
                    glDisablei(GL_SCISSOR_TEST, i);
                }
            }
-            if (updated.scissor.enabled &&
-                (scissor_changed || updated.scissor.x != current.scissor.x ||
-                 updated.scissor.y != current.scissor.y ||
-                 updated.scissor.width != current.scissor.width ||
-                 updated.scissor.height != current.scissor.height)) {
+
+            if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
+                updated.scissor.width != current.scissor.width ||
+                updated.scissor.height != current.scissor.height) {
                glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
                                 updated.scissor.height);
            }
@@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const {
            updated.height != current.height) {
            glViewport(updated.x, updated.y, updated.width, updated.height);
        }
+
        if (updated.depth_range_near != current.depth_range_near ||
            updated.depth_range_far != current.depth_range_far) {
            glDepthRange(updated.depth_range_near, updated.depth_range_far);
        }
-        const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled;
-        if (scissor_changed) {
+
+        if (updated.scissor.enabled != current.scissor.enabled) {
            if (updated.scissor.enabled) {
                glEnable(GL_SCISSOR_TEST);
            } else {
                glDisable(GL_SCISSOR_TEST);
            }
        }
-        if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x ||
-                                        updated.scissor.y != current.scissor.y ||
-                                        updated.scissor.width != current.scissor.width ||
-                                        updated.scissor.height != current.scissor.height)) {
+
+        if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
+            updated.scissor.width != current.scissor.width ||
+            updated.scissor.height != current.scissor.height) {
            glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
                      updated.scissor.height);
        }
@@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const {
 void OpenGLState::ApplyGlobalBlending() const {
    const Blend& current = cur_state.blend[0];
    const Blend& updated = blend[0];
-    const bool blend_changed = updated.enabled != current.enabled;
-    if (blend_changed) {
+    if (updated.enabled != current.enabled) {
        if (updated.enabled) {
            glEnable(GL_BLEND);
        } else {
@@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const {
    if (!updated.enabled) {
        return;
    }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+    if (updated.src_rgb_func != current.src_rgb_func ||
        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
        updated.dst_a_func != current.dst_a_func) {
        glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
                            updated.dst_a_func);
    }

-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
        glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
    }
 }
@@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const {
 void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
    const Blend& updated = blend[target];
    const Blend& current = cur_state.blend[target];
-    const bool blend_changed = updated.enabled != current.enabled || force;
-    if (blend_changed) {
+    if (updated.enabled != current.enabled || force) {
        if (updated.enabled) {
            glEnablei(GL_BLEND, static_cast<GLuint>(target));
        } else {
            glDisablei(GL_BLEND, static_cast<GLuint>(target));
        }
    }
-    if (!updated.enabled) {
-        return;
-    }
-    if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+
+    if (updated.src_rgb_func != current.src_rgb_func ||
        updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
        updated.dst_a_func != current.dst_a_func) {
        glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
                             updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
    }

-    if (blend_changed || updated.rgb_equation != current.rgb_equation ||
-        updated.a_equation != current.a_equation) {
+    if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
        glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
                                 updated.a_equation);
    }
@@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const {
 }

 void OpenGLState::ApplyLogicOp() const {
-    const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
-    if (logic_op_changed) {
+    if (logic_op.enabled != cur_state.logic_op.enabled) {
        if (logic_op.enabled) {
            glEnable(GL_COLOR_LOGIC_OP);
        } else {
@@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const {
        }
    }

-    if (logic_op.enabled &&
-        (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
+    if (logic_op.operation != cur_state.logic_op.operation) {
        glLogicOp(logic_op.operation);
    }
 }

 void OpenGLState::ApplyPolygonOffset() const {
-
    const bool fill_enable_changed =
        polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
    const bool line_enable_changed =
@@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const {
        }
    }

-    if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) &&
-        (factor_changed || units_changed || clamp_changed)) {
-
+    if (factor_changed || units_changed || clamp_changed) {
        if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
            glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
        } else {
@@ -483,7 +461,7 @@ void OpenGLState::ApplyTextures() const {

    if (has_delta) {
        glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       textures.data());
+                       textures.data() + first);
    }
 }

@@ -504,7 +482,7 @@ void OpenGLState::ApplySamplers() const {
    }
    if (has_delta) {
        glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                       samplers.data());
+                       samplers.data() + first);
    }
 }

@@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const {
        depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
        return;
    }
-    if (depth_clamp.far_plane != depth_clamp.near_plane) {
-        UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!");
-    }
+    UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+                         "Unimplemented Depth Clamp Separation!");
+
    if (depth_clamp.far_plane || depth_clamp.near_plane) {
        glEnable(GL_DEPTH_CLAMP);
    } else {
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -257,6 +257,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
                                                 const Tegra::FramebufferConfig& framebuffer) {
    texture.width = framebuffer.width;
    texture.height = framebuffer.height;
+    texture.pixel_format = framebuffer.pixel_format;

    GLint internal_format;
    switch (framebuffer.pixel_format) {
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
 /// Structure used for storing information about the display target for the Switch screen
 struct ScreenInfo {
    GLuint display_texture;
-    const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
+    const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
    TextureInfo texture;
 };

@@ -102,7 +102,7 @@ private:

    /// Used for transforming the framebuffer orientation
    Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
-    MathUtil::Rectangle<int> framebuffer_crop_rect;
+    Common::Rectangle<int> framebuffer_crop_rect;
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -0,0 +1,252 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <optional>
+#include <tuple>
+#include <vector>
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+// TODO(Rodrigo): Fine tune this number
+constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024;
+
+class VKMemoryAllocation final {
+public:
+    explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
+                                vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type)
+        : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size},
+          shifted_type{ShiftType(type)}, is_mappable{properties &
+                                                     vk::MemoryPropertyFlagBits::eHostVisible} {
+        if (is_mappable) {
+            const auto dev = device.GetLogical();
+            const auto& dld = device.GetDispatchLoader();
+            base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
+        }
+    }
+
+    ~VKMemoryAllocation() {
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        if (is_mappable)
+            dev.unmapMemory(memory, dld);
+        dev.free(memory, nullptr, dld);
+    }
+
+    VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
+        auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size),
+                                        static_cast<u64>(alignment));
+        if (!found) {
+            found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
+                                       static_cast<u64>(alignment));
+            if (!found) {
+                // Signal out of memory, it'll try to do more allocations.
+                return nullptr;
+            }
+        }
+        u8* address = is_mappable ? base_address + *found : nullptr;
+        auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
+                                                           *found + commit_size);
+        commits.push_back(commit.get());
+
+        // Last commit's address is highly probable to be free.
+        free_iterator = *found + commit_size;
+
+        return commit;
+    }
+
+    void Free(const VKMemoryCommitImpl* commit) {
+        ASSERT(commit);
+        const auto it =
+            std::find_if(commits.begin(), commits.end(),
+                         [&](const auto& stored_commit) { return stored_commit == commit; });
+        if (it == commits.end()) {
+            LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!");
+            UNREACHABLE();
+            return;
+        }
+        commits.erase(it);
+    }
+
+    /// Returns whether this allocation is compatible with the arguments.
+    bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const {
+        return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) &&
+               (type_mask & shifted_type) != 0;
+    }
+
+private:
+    static constexpr u32 ShiftType(u32 type) {
+        return 1U << type;
+    }
+
+    /// A memory allocator, it may return a free region between "start" and "end" with the solicited
+    /// requeriments.
+    std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
+        u64 iterator = start;
+        while (iterator + size < end) {
+            const u64 try_left = Common::AlignUp(iterator, alignment);
+            const u64 try_right = try_left + size;
+
+            bool overlap = false;
+            for (const auto& commit : commits) {
+                const auto [commit_left, commit_right] = commit->interval;
+                if (try_left < commit_right && commit_left < try_right) {
+                    // There's an overlap, continue the search where the overlapping commit ends.
+                    iterator = commit_right;
+                    overlap = true;
+                    break;
+                }
+            }
+            if (!overlap) {
+                // A free address has been found.
+                return try_left;
+            }
+        }
+        // No free regions where found, return an empty optional.
+        return std::nullopt;
+    }
+
+    const VKDevice& device;                   ///< Vulkan device.
+    const vk::DeviceMemory memory;            ///< Vulkan memory allocation handler.
+    const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
+    const u64 alloc_size;                     ///< Size of this allocation.
+    const u32 shifted_type;                   ///< Stored Vulkan type of this allocation, shifted.
+    const bool is_mappable;                   ///< Whether the allocation is mappable.
+
+    /// Base address of the mapped pointer.
+    u8* base_address{};
+
+    /// Hints where the next free region is likely going to be.
+    u64 free_iterator{};
+
+    /// Stores all commits done from this allocation.
+    std::vector<const VKMemoryCommitImpl*> commits;
+};
+
+VKMemoryManager::VKMemoryManager(const VKDevice& device)
+    : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())},
+      is_memory_unified{GetMemoryUnified(props)} {}
+
+VKMemoryManager::~VKMemoryManager() = default;
+
+VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) {
+    ASSERT(reqs.size < ALLOC_CHUNK_SIZE);
+
+    // When a host visible commit is asked, search for host visible and coherent, otherwise search
+    // for a fast device local type.
+    const vk::MemoryPropertyFlags wanted_properties =
+        host_visible
+            ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
+            : vk::MemoryPropertyFlagBits::eDeviceLocal;
+
+    const auto TryCommit = [&]() -> VKMemoryCommit {
+        for (auto& alloc : allocs) {
+            if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
+                continue;
+
+            if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
+                return commit;
+            }
+        }
+        return {};
+    };
+
+    if (auto commit = TryCommit(); commit) {
+        return commit;
+    }
+
+    // Commit has failed, allocate more memory.
+    if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) {
+        // TODO(Rodrigo): Try to use host memory.
+        LOG_CRITICAL(Render_Vulkan, "Ran out of memory!");
+        UNREACHABLE();
+    }
+
+    // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
+    // there's a bug.
+    auto commit = TryCommit();
+    ASSERT(commit);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const auto requeriments = dev.getImageMemoryRequirements(image, dld);
+    auto commit = Commit(requeriments, host_visible);
+    dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
+    return commit;
+}
+
+bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
+                                  u64 size) {
+    const u32 type = [&]() {
+        for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
+            const auto flags = props.memoryTypes[type_index].propertyFlags;
+            if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
+                // The type matches in type and in the wanted properties.
+                return type_index;
+            }
+        }
+        LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!");
+        UNREACHABLE();
+        return 0u;
+    }();
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    // Try to allocate found type.
+    const vk::MemoryAllocateInfo memory_ai(size, type);
+    vk::DeviceMemory memory;
+    if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
+        res != vk::Result::eSuccess) {
+        LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
+        return false;
+    }
+    allocs.push_back(
+        std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
+    return true;
+}
+
+/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) {
+    for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) {
+        if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
+            // Memory is considered unified when heaps are device local only.
+            return false;
+        }
+    }
+    return true;
+}
+
+VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
+                                       u8* data, u64 begin, u64 end)
+    : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
+
+VKMemoryCommitImpl::~VKMemoryCommitImpl() {
+    allocation->Free(this);
+}
+
+u8* VKMemoryCommitImpl::GetData() const {
+    ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit.");
+    return data;
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -0,0 +1,87 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKMemoryAllocation;
+class VKMemoryCommitImpl;
+
+using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
+
+class VKMemoryManager final {
+public:
+    explicit VKMemoryManager(const VKDevice& device);
+    ~VKMemoryManager();
+
+    /**
+     * Commits a memory with the specified requeriments.
+     * @param reqs Requeriments returned from a Vulkan call.
+     * @param host_visible Signals the allocator that it *must* use host visible and coherent
+     * memory. When passing false, it will try to allocate device local memory.
+     * @returns A memory commit.
+     */
+    VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
+
+    /// Commits memory required by the buffer and binds it.
+    VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible);
+
+    /// Commits memory required by the image and binds it.
+    VKMemoryCommit Commit(vk::Image image, bool host_visible);
+
+    /// Returns true if the memory allocations are done always in host visible and coherent memory.
+    bool IsMemoryUnified() const {
+        return is_memory_unified;
+    }
+
+private:
+    /// Allocates a chunk of memory.
+    bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
+
+    /// Returns true if the device uses an unified memory model.
+    static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props);
+
+    const VKDevice& device;                                  ///< Device handler.
+    const vk::PhysicalDeviceMemoryProperties props;          ///< Physical device properties.
+    const bool is_memory_unified;                            ///< True if memory model is unified.
+    std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations.
+};
+
+class VKMemoryCommitImpl final {
+    friend VKMemoryAllocation;
+
+public:
+    explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data,
+                                u64 begin, u64 end);
+    ~VKMemoryCommitImpl();
+
+    /// Returns the writeable memory map. The commit has to be mappable.
+    u8* GetData() const;
+
+    /// Returns the Vulkan memory handler.
+    vk::DeviceMemory GetMemory() const {
+        return memory;
+    }
+
+    /// Returns the start position of the commit relative to the allocation.
+    vk::DeviceSize GetOffset() const {
+        return static_cast<vk::DeviceSize>(interval.first);
+    }
+
+private:
+    std::pair<u64, u64> interval{};   ///< Interval where the commit exists.
+    vk::DeviceMemory memory;          ///< Vulkan device memory handler.
+    VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
+    u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included.
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
    protected_resources.push_back(resource);
 }

-void VKFence::Unprotect(const VKResource* resource) {
+void VKFence::Unprotect(VKResource* resource) {
    const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
-    if (it != protected_resources.end()) {
-        protected_resources.erase(it);
-    }
+    ASSERT(it != protected_resources.end());
+
+    resource->OnFenceRemoval(this);
+    protected_resources.erase(it);
 }

 VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
 }

 void VKFenceWatch::Wait() {
-    if (!fence) {
+    if (fence == nullptr) {
        return;
    }
    fence->Wait();
    fence->Unprotect(this);
-    fence = nullptr;
 }

 void VKFenceWatch::Watch(VKFence& new_fence) {
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
    void Protect(VKResource* resource);

    /// Removes protection for a resource.
-    void Unprotect(const VKResource* resource);
+    void Unprotect(VKResource* resource);

    /// Retreives the fence.
    operator vk::Fence() const {
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
+    : device{device}, resource_manager{resource_manager} {
+    next_fence = &resource_manager.CommitFence();
+    AllocateNewContext();
+}
+
+VKScheduler::~VKScheduler() = default;
+
+VKExecutionContext VKScheduler::GetExecutionContext() const {
+    return VKExecutionContext(current_fence, current_cmdbuf);
+}
+
+VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) {
+    SubmitExecution(semaphore);
+    current_fence->Wait();
+    current_fence->Release();
+    AllocateNewContext();
+    return GetExecutionContext();
+}
+
+void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.end(dld);
+
+    const auto queue = device.GetGraphicsQueue();
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+                                     &semaphore);
+    queue.submit({submit_info}, *current_fence, dld);
+}
+
+void VKScheduler::AllocateNewContext() {
+    current_fence = next_fence;
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    next_fence = &resource_manager.CommitFence();
+
+    const auto& dld = device.GetDispatchLoader();
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -0,0 +1,69 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKExecutionContext;
+class VKFence;
+class VKResourceManager;
+
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
+public:
+    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+    ~VKScheduler();
+
+    /// Gets the current execution context.
+    [[nodiscard]] VKExecutionContext GetExecutionContext() const;
+
+    /// Sends the current execution context to the GPU. It invalidates the current execution context
+    /// and returns a new one.
+    VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
+
+    /// Sends the current execution context to the GPU and waits for it to complete. It invalidates
+    /// the current execution context and returns a new one.
+    VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
+
+private:
+    void SubmitExecution(vk::Semaphore semaphore);
+
+    void AllocateNewContext();
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    vk::CommandBuffer current_cmdbuf;
+    VKFence* current_fence = nullptr;
+    VKFence* next_fence = nullptr;
+};
+
+class VKExecutionContext {
+    friend class VKScheduler;
+
+public:
+    VKExecutionContext() = default;
+
+    VKFence& GetFence() const {
+        return *fence;
+    }
+
+    vk::CommandBuffer GetCommandBuffer() const {
+        return cmdbuf;
+    }
+
+private:
+    explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
+        : fence{fence}, cmdbuf{cmdbuf} {}
+
+    VKFence* fence{};
+    vk::CommandBuffer cmdbuf;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_stream_buffer.h"
+
+namespace Vulkan {
+
+constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
+constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
+
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                               VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                               vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
+    : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
+                                                                                   pipeline_stage} {
+    CreateBuffers(memory_manager, usage);
+    ReserveWatches(WATCHES_INITIAL_RESERVE);
+}
+
+VKStreamBuffer::~VKStreamBuffer() = default;
+
+std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
+    ASSERT(size <= buffer_size);
+    mapped_size = size;
+
+    if (offset + size > buffer_size) {
+        // The buffer would overflow, save the amount of used buffers, signal an invalidation and
+        // reset the state.
+        invalidation_mark = used_watches;
+        used_watches = 0;
+        offset = 0;
+    }
+
+    return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
+}
+
+VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
+    ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
+
+    if (invalidation_mark) {
+        // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
+        exctx = scheduler.Flush();
+        std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
+                      [&](auto& resource) { resource->Wait(); });
+        invalidation_mark = std::nullopt;
+    }
+
+    if (used_watches + 1 >= watches.size()) {
+        // Ensure that there are enough watches.
+        ReserveWatches(WATCHES_RESERVE_CHUNK);
+    }
+    // Add a watch for this allocation.
+    watches[used_watches++]->Watch(exctx.GetFence());
+
+    offset += size;
+
+    return exctx;
+}
+
+void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
+    const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
+                                         nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    commit = memory_manager.Commit(*buffer, true);
+    mapped_pointer = commit->GetData();
+}
+
+void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
+    const std::size_t previous_size = watches.size();
+    watches.resize(previous_size + grow_size);
+    std::generate(watches.begin() + previous_size, watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+}
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFence;
+class VKFenceWatch;
+class VKResourceManager;
+class VKScheduler;
+
+class VKStreamBuffer {
+public:
+    explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
+                            VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
+                            vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
+    ~VKStreamBuffer();
+
+    /**
+     * Reserves a region of memory from the stream buffer.
+     * @param size Size to reserve.
+     * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
+     * offset and a boolean that's true when buffer has been invalidated.
+     */
+    std::tuple<u8*, u64, bool> Reserve(u64 size);
+
+    /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
+    [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
+
+    vk::Buffer GetBuffer() const {
+        return *buffer;
+    }
+
+private:
+    /// Creates Vulkan buffer handles committing the required the required memory.
+    void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
+
+    /// Increases the amount of watches available.
+    void ReserveWatches(std::size_t grow_size);
+
+    const VKDevice& device;                      ///< Vulkan device manager.
+    VKScheduler& scheduler;                      ///< Command scheduler.
+    const u64 buffer_size;                       ///< Total size of the stream buffer.
+    const vk::AccessFlags access;                ///< Access usage of this stream buffer.
+    const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
+
+    UniqueBuffer buffer;   ///< Mapped buffer.
+    VKMemoryCommit commit; ///< Memory commit.
+    u8* mapped_pointer{};  ///< Pointer to the host visible commit
+
+    u64 offset{};      ///< Buffer iterator.
+    u64 mapped_size{}; ///< Size reserved for the current copy.
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
+    std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
+    std::optional<std::size_t>
+        invalidation_mark{}; ///< Number of watches used in the current invalidation.
+};
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
                                          Tegra::Shader::IpaSampleMode::Default};

        u64 next_element = instr.attribute.fmt20.element;
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                                                instr.ipa.sample_mode.Value()};

        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
+        Node value = attr;
+        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
+        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
+            // In theory by setting them as perspective, OpenGL does the perspective correction.
+            // A way must figured to reverse the last step of it.
+            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
+                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+            }
+        }
+        value = GetSaturatedFloat(value, instr.ipa.saturate);

        SetRegister(bb, instr.gpr0, value);
        break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    return pc;
 }

-} // namespace VideoCommon::Shader
+} // namespace VideoCommon::Shader
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                return {node, cursor};
        }
        if (const auto conditional = std::get_if<ConditionalNode>(node)) {
-            const auto& code = conditional->GetCode();
-            const auto [found, internal_cursor] =
-                FindOperation(code, static_cast<s64>(code.size() - 1), operation_code);
+            const auto& conditional_code = conditional->GetCode();
+            const auto [found, internal_cursor] = FindOperation(
+                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
            if (found)
                return {found, cursor};
        }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
        return nullptr;
    }
    if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
-        const auto& code = conditional->GetCode();
-        return TrackCbuf(tracked, code, static_cast<s64>(code.size()));
+        const auto& conditional_code = conditional->GetCode();
+        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
    }
    return nullptr;
 }
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {

    for (unsigned int y = 0; y < surface_height; ++y) {
        for (unsigned int x = 0; x < surface_width; ++x) {
-            Math::Vec4<u8> color;
+            Common::Vec4<u8> color;
            color[0] = texture_data[x + y * surface_width + 0];
            color[1] = texture_data[x + y * surface_width + 1];
            color[2] = texture_data[x + y * surface_width + 2];
Author	SHA1	Message	Date
ReinUsesLisp	e85066dac7	gl_rasterizer: Remove texture unbinding after dispatching a draw call Unbinding was required when OpenGL delete operations didn't unbind a resource if it was bound. This is no longer needed and can be removed.	2019-02-28 00:17:50 -03:00
ReinUsesLisp	bb3ab7d66c	gl_state: Fixup multibind bug	2019-02-28 00:17:03 -03:00
bunnei	49c6d21b31	Merge pull request #2174 from lioncash/fwd service/hid: Amend forward declaration of ServiceManager	2019-02-27 21:20:06 -05:00
bunnei	1b13859af8	Merge pull request #2152 from ReinUsesLisp/vk-stream-buffer vk_stream_buffer: Implement a stream buffer	2019-02-27 21:19:15 -05:00
bunnei	1f5d6a8fed	Merge pull request #2121 from FernandoS27/texception2 Improve the Accuracy of the Rasterizer Cache through a Texception Pass	2019-02-27 21:17:55 -05:00
bunnei	66f4fd4c81	Merge pull request #2172 from lioncash/reorder gl_rasterizer/vk_memory_manager: Silence -Wreorder warnings	2019-02-27 21:14:20 -05:00
Fernando Sahmkow	7ea097e5c2	Devirtualize Register/Unregister and use a wrapper instead.	2019-02-27 21:58:50 -04:00
Fernando Sahmkow	5a9204dbd7	Corrections and redesign.	2019-02-27 21:58:49 -04:00
Fernando Sahmkow	d6b9b51606	Fix linux compile error.	2019-02-27 21:58:48 -04:00
Fernando Sahmkow	e64fa4d2ea	Remove NotifyFrameBuffer as we are doing a texception pass every drawcall.	2019-02-27 21:58:47 -04:00
Fernando Sahmkow	3558c88442	Remove certain optimizations that caused texception to fail in certain scenarios.	2019-02-27 21:58:45 -04:00
Fernando Sahmkow	e9d84ef22c	Bug fixes and formatting	2019-02-27 21:58:44 -04:00
Fernando Sahmkow	5bc82d124c	rasterizer_cache_gl: Implement Texception Pass	2019-02-27 21:58:43 -04:00
Fernando Sahmkow	8932001610	rasterizer_cache_gl: Implement Partial Reinterpretation of Surfaces.	2019-02-27 21:58:40 -04:00
Fernando Sahmkow	44ea2810e4	rasterizer_cache: mark reinterpreted surfaces and add ability to reload marked surfaces on next use.	2019-02-27 21:58:39 -04:00
Fernando Sahmkow	d583fc1e97	rasterizer_cache_gl: Notify on framebuffer change	2019-02-27 21:58:37 -04:00
Fernando Sahmkow	45b6d2d349	rasterizer_cache: Expose FlushObject to Child classes and allow redefining of Register and Unregister	2019-02-27 21:57:33 -04:00
bunnei	f15e2dd881	Merge pull request #2163 from ReinUsesLisp/bitset-dirty maxwell_3d: Use std::bitset to manage dirty flags	2019-02-27 20:50:08 -05:00
Annomatg	ef84c70d22	Speed up memory page mapping (#2141 ) - Memory::MapPages total samplecount was reduced from 4.6% to 1.06%. - From main menu into the game from 1.03% to 0.35%	2019-02-27 17:22:47 -05:00
bunnei	532dda0499	Merge pull request #2176 from lioncash/com audio_core/cubeb_sink: Ensure COM is initialized on Windows prior to calling cubeb_init	2019-02-27 17:12:06 -05:00
Lioncash	1068c1b06f	audio_core/cubeb_sink: Ensure COM is initialized on Windows prior to calling cubeb_init cubeb now requires that COM explicitly be initialized on the thread prior to calling cubeb_init.	2019-02-27 16:14:53 -05:00
Lioncash	6335bf136f	service/hid: Amend forward declaration of ServiceManager The SM namespace is within the Service namespace, so this was forward declaring a type that didn't exist.	2019-02-27 11:36:48 -05:00
bunnei	42f7c11021	Merge pull request #2169 from lioncash/naming audio_core/audio_renderer: Provide names for some parameters of AudioRendererParameter	2019-02-27 11:26:54 -05:00
bunnei	14430f7df9	Merge pull request #2170 from lioncash/emu-window core/frontend/emu_window: Make ClipToTouchScreen a const member function	2019-02-27 11:26:24 -05:00
bunnei	eb5a3dd1c7	Merge pull request #2161 from lioncash/handle-table kernel/handle_table: Allow process capabilities to limit the handle table size	2019-02-27 11:22:26 -05:00
bunnei	be1a1584fc	Merge pull request #2168 from lioncash/cubeb externals: Update cubeb to the master version	2019-02-27 11:20:14 -05:00
bunnei	66e023fba2	Merge pull request #2167 from lioncash/namespace common: Move Quaternion, Rectangle, Vec2, Vec3, and Vec4 into the Common namespace	2019-02-27 11:19:53 -05:00
bunnei	b27e6ad912	Merge pull request #2171 from lioncash/pragma gl_shader_disk_cache: Remove #pragma once from cpp file	2019-02-27 11:19:17 -05:00
Lioncash	16ea93c11e	vk_memory_manager: Reorder constructor initializer list in terms of member declaration order Reorders members in the order that they would actually be initialized in. Silences a -Wreorder warning.	2019-02-27 11:08:19 -05:00
Lioncash	a6a783b3dc	gl_rasterizer: Reorder constructor initializer list in terms of member declaration order Orders the members in the order they would actually be initialized in. Silences a -Wreorder warning.	2019-02-27 11:08:19 -05:00
Lioncash	e7eff72e83	gl_shader_disk_cache: Remove #pragma once from cpp file This is only necessary in headers. Silences a warning with clang.	2019-02-27 11:02:49 -05:00
Lioncash	46b3209abb	core/frontend/emu_window: Make ClipToTouchScreen a const member function This member function doesn't modify instance state, so it can have the const specifier applied to it.	2019-02-27 08:54:42 -05:00
Lioncash	0e1b5acc6a	audio_core/audio_renderer: Name previously unknown parameters of AudioRendererParameter Provides names for previously unknown entries (aside from the two u8 that appear to be padding bytes, and a single word that also appears to be reserved or padding). This will be useful in subsequent changes when unstubbing behavior related to the audio renderer services.	2019-02-27 06:09:07 -05:00
Lioncash	b9238edd0d	common/math_util: Move contents into the Common namespace These types are within the common library, so they should be within the Common namespace.	2019-02-27 03:38:39 -05:00
Lioncash	e56f32a071	externals: Update cubeb to 6f2420de8f155b10330cf973900ac7bdbfee589d Keeps the audio library we use up to date.	2019-02-27 01:21:51 -05:00
Lioncash	1b855efd5e	common/vector_math: Move Vec[x] types into the Common namespace These types are within the common library, so they should be using the Common namespace.	2019-02-26 22:38:36 -05:00
Lioncash	a1574aabd5	common/quaternion: Move Quaternion into the Common namespace Quaternion is within the common library, so it should be using the Common namespace.	2019-02-26 22:31:17 -05:00
bunnei	10d1d58390	Merge pull request #2164 from ReinUsesLisp/configure-blit renderer_opengl: Update pixel format tracking	2019-02-26 12:12:10 -05:00
ReinUsesLisp	d91e35a50a	renderer_opengl: Update pixel format tracking	2019-02-26 03:47:16 -03:00
ReinUsesLisp	5219edd715	maxwell_3d: Use std::bitset to manage dirty flags	2019-02-26 03:01:48 -03:00
ReinUsesLisp	730eb1dad7	vk_stream_buffer: Remove copy code path	2019-02-26 02:09:43 -03:00
bunnei	c3471bf618	Merge pull request #2156 from FreddyFunk/patch-1 file_sys/vfs_vector: Fix ignored offset on Write	2019-02-25 18:28:58 -05:00
bunnei	da1b45de34	Merge pull request #2158 from lioncash/table service/vi: Update IManagerDisplayService's function table	2019-02-25 18:27:43 -05:00
bunnei	1cffd3848b	Merge pull request #2160 from lioncash/audio-warn audio_core: Resolve compilation warnings	2019-02-25 18:25:36 -05:00
bunnei	93c1630570	Merge pull request #2159 from lioncash/warn shader/track: Resolve variable shadowing warnings	2019-02-25 13:26:00 -05:00
Lioncash	d29f9e9709	kernel/handle_table: Make local variables as const where applicable Makes immutable state explicit.	2019-02-25 11:12:38 -05:00
Lioncash	5167d1577d	kernel/handle_table: Allow process capabilities to limit the handle table size The kernel allows restricting the total size of the handle table through the process capability descriptors. Until now, this functionality wasn't hooked up. With this, the process handle tables become properly restricted. In the case of metadata-less executables, the handle table will assume the maximum size is requested, preserving the behavior that existed before these changes.	2019-02-25 11:12:32 -05:00
Lioncash	4f8cd74061	kernel/handle-table: In-class initialize data members Directly initializes members where applicable.	2019-02-25 10:14:05 -05:00
Lioncash	0220862ba5	kernel/handle_table: Resolve truncation warnings Avoids implicit truncation warnings from u32 -> u16 (the truncation is desirable behavior here).	2019-02-25 09:53:21 -05:00
Lioncash	04d7b7e09d	audio_core/cubeb_sink: Initialize CubebSinkStream's last_frame data member Ensures that all member variables are initialized in a deterministic manner across the board.	2019-02-25 09:40:37 -05:00
Lioncash	8250f9bb1c	audio_core/cubeb_sink: Add override specifier to destructor CubebSinkStream inherits from a base class with a virtual destructor, so override can be appended to CubebSinkStream's destructor.	2019-02-25 09:38:27 -05:00
Lioncash	7cdeec20ec	audio_core/cubeb_sink: Resolve variable shadowing warnings in SamplesInQueue The name of the parameter was shadowing the member variable of the same name. Instead, alter the name of the parameter to prevent said shadowing.	2019-02-25 09:28:51 -05:00
Lioncash	a12f4efa2f	audio_core/codec: Resolve truncation warnings within DecodeADPCM The assignments here were performing an implicit truncation from int to s16. Make it explicit that this is desired behavior.	2019-02-25 09:24:39 -05:00
Lioncash	c1b2e35625	shader/track: Resolve variable shadowing warnings	2019-02-25 09:10:59 -05:00
Lioncash	be7dad5e7e	service/vi: Update IManagerDisplayService's function table Amends it to add the 7.0.0+ CreateStrayLayer function.	2019-02-25 08:09:00 -05:00
bunnei	c07987dfab	Merge pull request #2118 from FernandoS27/ipa-improve shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-24 23:04:22 -05:00
bunnei	c4243c07cc	Merge pull request #2119 from FernandoS27/fix-copy rasterizer_cache_gl: Only do fast layered copy on the same format.	2019-02-24 23:03:52 -05:00
bunnei	c6170565b5	Merge pull request #2155 from FearlessTobi/port-4655 Port citra-emu/citra#4655: "Remove GCC version checks"	2019-02-24 23:03:13 -05:00
bunnei	57985fb16a	Merge pull request #2144 from lioncash/factor service/vi: Convert Display and Layer structs into classes	2019-02-24 23:02:50 -05:00
Frederic L	517933adcb	file_sys/vfs_vector: Fix ignored offset on Write	2019-02-25 00:27:49 +01:00
tgsm	030814b1cb	Remove GCC version checks Citra can't be compiled using GCC <7 because of required C++17 support, so these version checks don't need to exist anymore.	2019-02-24 15:24:06 +01:00
bunnei	90c780e6f3	Merge pull request #2139 from degasus/dma_pusher video_core/dma_pusher: The full list of headers at once.	2019-02-24 04:15:49 -05:00
ReinUsesLisp	33a0597603	vk_stream_buffer: Implement a stream buffer This manages two kinds of streaming buffers: one for unified memory models and one for dedicated GPUs. The first one skips the copy from the staging buffer to the real buffer, since it creates an unified buffer. This implementation waits for all fences to finish their operation before "invalidating". This is suboptimal since it should allocate another buffer or start searching from the beginning. There is room for improvement here. This could also handle AMD's "pinned" memory (a heap with 256 MiB) that seems to be designed for buffer streaming.	2019-02-24 04:27:51 -03:00
ReinUsesLisp	281a8bf259	vk_resource_manager: Minor VKFenceWatch changes	2019-02-24 04:19:04 -03:00
bunnei	f7090bacc5	Merge pull request #2146 from ReinUsesLisp/vulkan-scheduler vk_scheduler: Implement a scheduler	2019-02-23 23:32:43 -05:00
bunnei	d062991643	Merge pull request #2150 from ReinUsesLisp/fixup-layer-swizzle gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:31:34 -05:00
bunnei	4ab978d670	Merge pull request #2151 from ReinUsesLisp/fixup-vk-memory-manager vk_memory_manager: Fixup commit interval allocation	2019-02-23 23:29:53 -05:00
ReinUsesLisp	92050c4d86	vk_memory_manager: Fixup commit interval allocation VKMemoryCommitImpl was using as the end of its interval "begin + end". That ended up wasting memory.	2019-02-24 01:04:41 -03:00
ReinUsesLisp	abef11a540	gl_rasterizer_cache: Fixup parameter order in layered swizzle	2019-02-23 23:27:30 -03:00
ReinUsesLisp	f546fb35ed	vk_scheduler: Implement a scheduler The scheduler abstracts command buffer and fence management with an interface that's able to do OpenGL-like operations on Vulkan command buffers. It returns by value a command buffer and fence that have to be used for subsequent operations until Flush or Finish is executed, after that the current execution context (the pair of command buffers and fences) gets invalidated a new one must be fetched. Thankfully validation layers will quickly detect if this is skipped throwing an error due to modifications to a sent command buffer.	2019-02-22 01:33:32 -03:00
bunnei	94b27bb8a5	Merge pull request #2138 from ReinUsesLisp/vulkan-memory-manager vk_memory_manager: Implement memory manager	2019-02-21 22:26:54 -05:00
Lioncash	90528f1326	service/nvflinger: Store BufferQueue instances as regular data members The NVFlinger service is already passed into services that need to guarantee its lifetime, so the BufferQueue instances will already live as long as they're needed. Making them std::shared_ptr instances in this case is unnecessary.	2019-02-21 22:09:46 -05:00
Lioncash	fd15730767	service/vi/vi_layer: Convert Layer struct into a class Like the previous changes made to the Display struct, this prepares the Layer struct for changes to its interface. Given Layer will be given more invariants in the future, we convert it into a class to better signify that.	2019-02-21 12:13:09 -05:00
Lioncash	fa4dc2cf42	service/nvflinger: Move display specifics over to vi_display With the display and layer structures relocated to the vi service, we can begin giving these a proper interface before beginning to properly support the display types. This converts the display struct into a class and provides it with the necessary functions to preserve behavior within the NVFlinger class.	2019-02-21 12:13:04 -05:00
bunnei	9539c4203b	Merge pull request #2125 from ReinUsesLisp/fixup-glstate gl_state: Synchronize gl_state even when state is disabled	2019-02-20 21:47:46 -05:00
bunnei	ae437320c8	Merge pull request #2130 from lioncash/system_engine video_core: Remove usages of System::GetInstance() within the engines	2019-02-20 21:24:56 -05:00
Jungy	3273f93cd5	Fixes Unicode Key File Directories (#2120 ) * Fixes Unicode Key File Directories Adds code so that when loading a file it converts to UTF16 first, to ensure the files can be opened. Code borrowed from FileUtil::Exists. * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com> * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com> * Using FileUtil instead to be cleaner. * Update src/core/crypto/key_manager.cpp Co-Authored-By: Jungorend <Jungorend@users.noreply.github.com>	2019-02-20 21:24:25 -05:00
bunnei	ef559f5741	Merge pull request #2142 from lioncash/relocate service/nvflinger: Relocate definitions of Layer and Display to the vi service	2019-02-20 21:21:55 -05:00
Markus Wick	6dd40976d0	video_core/dma_pusher: Simplyfy Step() logic. As fetching command list headers and and the list of command headers is a fixed 1:1 relation now, they can be implemented within a single call. This cleans up the Step() logic quite a bit.	2019-02-19 10:28:42 +01:00
Markus Wick	717394c980	video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10.	2019-02-19 09:58:38 +01:00
ReinUsesLisp	b675c97cdd	vk_memory_manager: Implement memory manager A memory manager object handles the memory allocations for a device. It allocates chunks of Vulkan memory objects and then suballocates.	2019-02-19 03:42:28 -03:00
Lioncash	a8fa5019b5	video_core: Remove usages of System::GetInstance() within the engines Avoids the use of the global accessor in favor of explicitly making the system a dependency within the interface.	2019-02-15 22:06:23 -05:00
ReinUsesLisp	8dfc81239f	gl_state: Synchronize gl_state even when state is disabled There are some potential edge cases where gl_state may fail to track the state if a related state changes while the toggle is disabled or it didn't change. This addresses that.	2019-02-15 01:30:14 -03:00
Fernando Sahmkow	10682ad7e0	shader_decompiler: Improve Accuracy of Attribute Interpolation.	2019-02-14 03:25:07 -04:00
Fernando Sahmkow	bb41683394	rasterizer_cache_gl: Only do fast layered copy on the same format. As glCopyImageSubData does not support different formats.	2019-02-13 16:55:00 -04:00