nfp: Correct erroneous sizeof expression within GetTagInfo()

The previous expression would copy sizeof(size_t) amount of bytes (8 on a 64-bit platform) rather than the full 10 bytes comprising the uuid member. Given the source and destination types are the same, we can just use an assignment here instead.
Merge pull request #1628 from greggameplayer/Texture2DArray
2018-11-14 12:53:39 -05:00 · 2018-11-12 21:13:47 -08:00 · 2018-11-12 21:10:08 -08:00 · 2018-11-12 20:13:47 -08:00 · 2018-11-12 21:46:21 -05:00 · 2018-11-12 18:34:54 -08:00
136 changed files with 4060 additions and 2055 deletions
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -121,7 +121,8 @@ CubebSink::CubebSink(std::string target_device_name) {
            const auto collection_end{collection.device + collection.count};
            const auto device{
                std::find_if(collection.device, collection_end, [&](const cubeb_device_info& info) {
-                    return target_device_name == info.friendly_name;
+                    return info.friendly_name != nullptr &&
+                           target_device_name == info.friendly_name;
                })};
            if (device != collection_end) {
                output_device = device->devid;
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -11,7 +11,6 @@
 #include "audio_core/stream.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
-#include "common/microprofile.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/settings.h"
@@ -104,10 +103,7 @@ void Stream::PlayNextBuffer() {
    CoreTiming::ScheduleEventThreadsafe(GetBufferReleaseCycles(*active_buffer), release_event, {});
 }

-MICROPROFILE_DEFINE(AudioOutput, "Audio", "ReleaseActiveBuffer", MP_RGB(100, 100, 255));
-
 void Stream::ReleaseActiveBuffer() {
-    MICROPROFILE_SCOPE(AudioOutput);
    ASSERT(active_buffer);
    released_buffers.push(std::move(active_buffer));
    release_callback();
--- a/src/audio_core/time_stretch.cpp
+++ b/src/audio_core/time_stretch.cpp
@@ -32,10 +32,10 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,
    // We were given actual_samples number of samples, and num_samples were requested from us.
    double current_ratio = static_cast<double>(num_in) / static_cast<double>(num_out);

-    const double max_latency = 1.0; // seconds
+    const double max_latency = 0.25; // seconds
    const double max_backlog = m_sample_rate * max_latency;
    const double backlog_fullness = m_sound_touch.numSamples() / max_backlog;
-    if (backlog_fullness > 5.0) {
+    if (backlog_fullness > 4.0) {
        // Too many samples in backlog: Don't push anymore on
        num_in = 0;
    }
@@ -49,7 +49,7 @@ std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out,

    // This low-pass filter smoothes out variance in the calculated stretch ratio.
    // The time-scale determines how responsive this filter is.
-    constexpr double lpf_time_scale = 2.0; // seconds
+    constexpr double lpf_time_scale = 0.712; // seconds
    const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale);
    m_stretch_ratio += lpf_gain * (current_ratio - m_stretch_ratio);

--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -12,7 +12,8 @@
 #include <thread>
 #include <vector>
 #ifdef _WIN32
-#include <share.h> // For _SH_DENYWR
+#include <share.h>   // For _SH_DENYWR
+#include <windows.h> // For OutputDebugStringA
 #else
 #define _SH_DENYWR 0
 #endif
@@ -139,12 +140,18 @@ void FileBackend::Write(const Entry& entry) {
    if (!file.IsOpen() || bytes_written > MAX_BYTES_WRITTEN) {
        return;
    }
-    bytes_written += file.WriteString(FormatLogMessage(entry) + '\n');
+    bytes_written += file.WriteString(FormatLogMessage(entry).append(1, '\n'));
    if (entry.log_level >= Level::Error) {
        file.Flush();
    }
 }

+void DebuggerBackend::Write(const Entry& entry) {
+#ifdef _WIN32
+    ::OutputDebugStringA(FormatLogMessage(entry).append(1, '\n').c_str());
+#endif
+}
+
 /// Macro listing all log classes. Code should define CLS and SUB as desired before invoking this.
 #define ALL_LOG_CLASSES()                                                                          \
    CLS(Log)                                                                                       \
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -103,6 +103,20 @@ private:
    std::size_t bytes_written;
 };

+/**
+ * Backend that writes to Visual Studio's output window
+ */
+class DebuggerBackend : public Backend {
+public:
+    static const char* Name() {
+        return "debugger";
+    }
+    const char* GetName() const override {
+        return Name();
+    }
+    void Write(const Entry& entry) override;
+};
+
 void AddBackend(std::unique_ptr<Backend> backend);

 void RemoveBackend(std::string_view backend_name);
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -153,6 +153,7 @@ struct VisitorInterface : NonCopyable {

    /// Completion method, called once all fields have been visited
    virtual void Complete() = 0;
+    virtual bool SubmitTestcase() = 0;
 };

 /**
@@ -178,6 +179,9 @@ struct NullVisitor : public VisitorInterface {
    void Visit(const Field<std::chrono::microseconds>& /*field*/) override {}

    void Complete() override {}
+    bool SubmitTestcase() override {
+        return false;
+    }
 };

 /// Appends build-specific information to the given FieldCollection,
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -185,7 +185,7 @@ struct System::Impl {
            LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
            return ResultStatus::ErrorGetLoader;
        }
-        std::pair<boost::optional<u32>, Loader::ResultStatus> system_mode =
+        std::pair<std::optional<u32>, Loader::ResultStatus> system_mode =
            app_loader->LoadKernelSystemMode();

        if (system_mode.second != Loader::ResultStatus::Success) {
@@ -312,6 +312,10 @@ Cpu& System::CurrentCpuCore() {
    return impl->CurrentCpuCore();
 }

+const Cpu& System::CurrentCpuCore() const {
+    return impl->CurrentCpuCore();
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
    return impl->RunLoop(tight_loop);
 }
@@ -342,7 +346,11 @@ PerfStatsResults System::GetAndResetPerfStats() {
    return impl->GetAndResetPerfStats();
 }

-Core::TelemetrySession& System::TelemetrySession() const {
+TelemetrySession& System::TelemetrySession() {
+    return *impl->telemetry_session;
+}
+
+const TelemetrySession& System::TelemetrySession() const {
    return *impl->telemetry_session;
 }

@@ -350,7 +358,11 @@ ARM_Interface& System::CurrentArmInterface() {
    return CurrentCpuCore().ArmInterface();
 }

-std::size_t System::CurrentCoreIndex() {
+const ARM_Interface& System::CurrentArmInterface() const {
+    return CurrentCpuCore().ArmInterface();
+}
+
+std::size_t System::CurrentCoreIndex() const {
    return CurrentCpuCore().CoreIndex();
 }

@@ -358,6 +370,10 @@ Kernel::Scheduler& System::CurrentScheduler() {
    return CurrentCpuCore().Scheduler();
 }

+const Kernel::Scheduler& System::CurrentScheduler() const {
+    return CurrentCpuCore().Scheduler();
+}
+
 Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
    return CpuCore(core_index).Scheduler();
 }
@@ -378,6 +394,10 @@ ARM_Interface& System::ArmInterface(std::size_t core_index) {
    return CpuCore(core_index).ArmInterface();
 }

+const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
+    return CpuCore(core_index).ArmInterface();
+}
+
 Cpu& System::CpuCore(std::size_t core_index) {
    ASSERT(core_index < NUM_CPU_CORES);
    return *impl->cpu_cores[core_index];
@@ -392,6 +412,10 @@ ExclusiveMonitor& System::Monitor() {
    return *impl->cpu_exclusive_monitor;
 }

+const ExclusiveMonitor& System::Monitor() const {
+    return *impl->cpu_exclusive_monitor;
+}
+
 Tegra::GPU& System::GPU() {
    return *impl->gpu_core;
 }
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -129,11 +129,11 @@ public:
     */
    bool IsPoweredOn() const;

-    /**
-     * Returns a reference to the telemetry session for this emulation session.
-     * @returns Reference to the telemetry session.
-     */
-    Core::TelemetrySession& TelemetrySession() const;
+    /// Gets a reference to the telemetry session for this emulation session.
+    Core::TelemetrySession& TelemetrySession();
+
+    /// Gets a reference to the telemetry session for this emulation session.
+    const Core::TelemetrySession& TelemetrySession() const;

    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();
@@ -144,24 +144,36 @@ public:
    /// Gets an ARM interface to the CPU core that is currently running
    ARM_Interface& CurrentArmInterface();

+    /// Gets an ARM interface to the CPU core that is currently running
+    const ARM_Interface& CurrentArmInterface() const;
+
    /// Gets the index of the currently running CPU core
-    std::size_t CurrentCoreIndex();
+    std::size_t CurrentCoreIndex() const;

    /// Gets the scheduler for the CPU core that is currently running
    Kernel::Scheduler& CurrentScheduler();

-    /// Gets an ARM interface to the CPU core with the specified index
+    /// Gets the scheduler for the CPU core that is currently running
+    const Kernel::Scheduler& CurrentScheduler() const;
+
+    /// Gets a reference to an ARM interface for the CPU core with the specified index
    ARM_Interface& ArmInterface(std::size_t core_index);

+    /// Gets a const reference to an ARM interface from the CPU core with the specified index
+    const ARM_Interface& ArmInterface(std::size_t core_index) const;
+
    /// Gets a CPU interface to the CPU core with the specified index
    Cpu& CpuCore(std::size_t core_index);

    /// Gets a CPU interface to the CPU core with the specified index
    const Cpu& CpuCore(std::size_t core_index) const;

-    /// Gets the exclusive monitor
+    /// Gets a reference to the exclusive monitor
    ExclusiveMonitor& Monitor();

+    /// Gets a constant reference to the exclusive monitor
+    const ExclusiveMonitor& Monitor() const;
+
    /// Gets a mutable reference to the GPU interface
    Tegra::GPU& GPU();

@@ -230,6 +242,9 @@ private:
    /// Returns the currently running CPU core
    Cpu& CurrentCpuCore();

+    /// Returns the currently running CPU core
+    const Cpu& CurrentCpuCore() const;
+
    /**
     * Initialize the emulated system.
     * @param emu_window Reference to the host-system window used for video output and keyboard
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -141,28 +141,28 @@ Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source)
    return mac_key;
 }

-boost::optional<Key128> DeriveSDSeed() {
+std::optional<Key128> DeriveSDSeed() {
    const FileUtil::IOFile save_43(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
                                       "/system/save/8000000000000043",
                                   "rb+");
    if (!save_43.IsOpen())
-        return boost::none;
+        return {};

    const FileUtil::IOFile sd_private(
        FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir) + "/Nintendo/Contents/private", "rb+");
    if (!sd_private.IsOpen())
-        return boost::none;
+        return {};

    std::array<u8, 0x10> private_seed{};
    if (sd_private.ReadBytes(private_seed.data(), private_seed.size()) != private_seed.size()) {
-        return boost::none;
+        return {};
    }

    std::array<u8, 0x10> buffer{};
    std::size_t offset = 0;
    for (; offset + 0x10 < save_43.GetSize(); ++offset) {
        if (!save_43.Seek(offset, SEEK_SET)) {
-            return boost::none;
+            return {};
        }

        save_43.ReadBytes(buffer.data(), buffer.size());
@@ -172,12 +172,12 @@ boost::optional<Key128> DeriveSDSeed() {
    }

    if (!save_43.Seek(offset + 0x10, SEEK_SET)) {
-        return boost::none;
+        return {};
    }

    Key128 seed{};
    if (save_43.ReadBytes(seed.data(), seed.size()) != seed.size()) {
-        return boost::none;
+        return {};
    }
    return seed;
 }
@@ -291,26 +291,26 @@ static std::array<u8, target_size> MGF1(const std::array<u8, in_size>& seed) {
 }

 template <size_t size>
-static boost::optional<u64> FindTicketOffset(const std::array<u8, size>& data) {
+static std::optional<u64> FindTicketOffset(const std::array<u8, size>& data) {
    u64 offset = 0;
    for (size_t i = 0x20; i < data.size() - 0x10; ++i) {
        if (data[i] == 0x1) {
            offset = i + 1;
            break;
        } else if (data[i] != 0x0) {
-            return boost::none;
+            return {};
        }
    }

    return offset;
 }

-boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
-                                                       const RSAKeyPair<2048>& key) {
+std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
+                                                     const RSAKeyPair<2048>& key) {
    u32 cert_authority;
    std::memcpy(&cert_authority, ticket.data() + 0x140, sizeof(cert_authority));
    if (cert_authority == 0)
-        return boost::none;
+        return {};
    if (cert_authority != Common::MakeMagic('R', 'o', 'o', 't')) {
        LOG_INFO(Crypto,
                 "Attempting to parse ticket with non-standard certificate authority {:08X}.",
@@ -321,7 +321,7 @@ boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
    std::memcpy(rights_id.data(), ticket.data() + 0x2A0, sizeof(Key128));

    if (rights_id == Key128{})
-        return boost::none;
+        return {};

    Key128 key_temp{};

@@ -356,17 +356,17 @@ boost::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
    std::memcpy(m_2.data(), rsa_step.data() + 0x21, m_2.size());

    if (m_0 != 0)
-        return boost::none;
+        return {};

    m_1 = m_1 ^ MGF1<0x20>(m_2);
    m_2 = m_2 ^ MGF1<0xDF>(m_1);

    const auto offset = FindTicketOffset(m_2);
-    if (offset == boost::none)
-        return boost::none;
-    ASSERT(offset.get() > 0);
+    if (!offset)
+        return {};
+    ASSERT(*offset > 0);

-    std::memcpy(key_temp.data(), m_2.data() + offset.get(), key_temp.size());
+    std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size());

    return std::make_pair(rights_id, key_temp);
 }
@@ -395,7 +395,7 @@ static bool ValidCryptoRevisionString(std::string_view base, size_t begin, size_
    if (base.size() < begin + length)
        return false;
    return std::all_of(base.begin() + begin, base.begin() + begin + length,
-                       [](u8 c) { return std::isdigit(c); });
+                       [](u8 c) { return std::isxdigit(c); });
 }

 void KeyManager::LoadFromFile(const std::string& filename, bool is_title_keys) {
@@ -661,8 +661,8 @@ void KeyManager::DeriveSDSeedLazy() {
        return;

    const auto res = DeriveSDSeed();
-    if (res != boost::none)
-        SetKey(S128KeyType::SDSeed, res.get());
+    if (res)
+        SetKey(S128KeyType::SDSeed, *res);
 }

 static Key128 CalculateCMAC(const u8* source, size_t size, const Key128& key) {
@@ -889,9 +889,9 @@ void KeyManager::DeriveETicket(PartitionDataManager& data) {

    for (const auto& raw : res) {
        const auto pair = ParseTicket(raw, rsa_key);
-        if (pair == boost::none)
+        if (!pair)
            continue;
-        const auto& [rid, key] = pair.value();
+        const auto& [rid, key] = *pair;
        u128 rights_id;
        std::memcpy(rights_id.data(), rid.data(), rid.size());
        SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]);
--- a/src/core/crypto/key_manager.h
+++ b/src/core/crypto/key_manager.h
@@ -6,9 +6,10 @@

 #include <array>
 #include <map>
+#include <optional>
 #include <string>
+
 #include <boost/container/flat_map.hpp>
-#include <boost/optional.hpp>
 #include <fmt/format.h>
 #include "common/common_types.h"
 #include "core/crypto/partition_data_manager.h"
@@ -191,14 +192,14 @@ Key128 DeriveMasterKey(const std::array<u8, 0x90>& keyblob, const Key128& master
 std::array<u8, 0x90> DecryptKeyblob(const std::array<u8, 0xB0>& encrypted_keyblob,
                                    const Key128& key);

-boost::optional<Key128> DeriveSDSeed();
+std::optional<Key128> DeriveSDSeed();
 Loader::ResultStatus DeriveSDKeys(std::array<Key256, 2>& sd_keys, KeyManager& keys);

 std::vector<TicketRaw> GetTicketblob(const FileUtil::IOFile& ticket_save);

 // Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority (offset
 // 0x140-0x144 is zero)
-boost::optional<std::pair<Key128, Key128>> ParseTicket(
-    const TicketRaw& ticket, const RSAKeyPair<2048>& eticket_extended_key);
+std::optional<std::pair<Key128, Key128>> ParseTicket(const TicketRaw& ticket,
+                                                     const RSAKeyPair<2048>& eticket_extended_key);

 } // namespace Core::Crypto
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -4,10 +4,9 @@

 #include <algorithm>
 #include <cstring>
+#include <optional>
 #include <utility>

-#include <boost/optional.hpp>
-
 #include "common/logging/log.h"
 #include "core/crypto/aes_util.h"
 #include "core/crypto/ctr_encryption_layer.h"
@@ -306,18 +305,18 @@ bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTabl
        subsection_buckets.back().entries.push_back({section.bktr.relocation.offset, {0}, ctr_low});
        subsection_buckets.back().entries.push_back({size, {0}, 0});

-        boost::optional<Core::Crypto::Key128> key = boost::none;
+        std::optional<Core::Crypto::Key128> key = {};
        if (encrypted) {
            if (has_rights_id) {
                status = Loader::ResultStatus::Success;
                key = GetTitlekey();
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingTitlekey;
                    return false;
                }
            } else {
                key = GetKeyAreaKey(NCASectionCryptoType::BKTR);
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
                    return false;
                }
@@ -332,7 +331,7 @@ bool NCA::ReadRomFSSection(const NCASectionHeader& section, const NCASectionTabl
        auto bktr = std::make_shared<BKTR>(
            bktr_base_romfs, std::make_shared<OffsetVfsFile>(file, romfs_size, base_offset),
            relocation_block, relocation_buckets, subsection_block, subsection_buckets, encrypted,
-            encrypted ? key.get() : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
+            encrypted ? *key : Core::Crypto::Key128{}, base_offset, bktr_base_ivfc_offset,
            section.raw.section_ctr);

        // BKTR applies to entire IVFC, so make an offset version to level 6
@@ -388,11 +387,11 @@ u8 NCA::GetCryptoRevision() const {
    return master_key_id;
 }

-boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const {
+std::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType type) const {
    const auto master_key_id = GetCryptoRevision();

    if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, header.key_index))
-        return boost::none;
+        return {};

    std::vector<u8> key_area(header.key_area.begin(), header.key_area.end());
    Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(
@@ -416,25 +415,25 @@ boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType ty
    return out;
 }

-boost::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
+std::optional<Core::Crypto::Key128> NCA::GetTitlekey() {
    const auto master_key_id = GetCryptoRevision();

    u128 rights_id{};
    memcpy(rights_id.data(), header.rights_id.data(), 16);
    if (rights_id == u128{}) {
        status = Loader::ResultStatus::ErrorInvalidRightsID;
-        return boost::none;
+        return {};
    }

    auto titlekey = keys.GetKey(Core::Crypto::S128KeyType::Titlekey, rights_id[1], rights_id[0]);
    if (titlekey == Core::Crypto::Key128{}) {
        status = Loader::ResultStatus::ErrorMissingTitlekey;
-        return boost::none;
+        return {};
    }

    if (!keys.HasKey(Core::Crypto::S128KeyType::Titlekek, master_key_id)) {
        status = Loader::ResultStatus::ErrorMissingTitlekek;
-        return boost::none;
+        return {};
    }

    Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(
@@ -458,25 +457,25 @@ VirtualFile NCA::Decrypt(const NCASectionHeader& s_header, VirtualFile in, u64 s
    case NCASectionCryptoType::BKTR:
        LOG_DEBUG(Crypto, "called with mode=CTR, starting_offset={:016X}", starting_offset);
        {
-            boost::optional<Core::Crypto::Key128> key = boost::none;
+            std::optional<Core::Crypto::Key128> key = {};
            if (has_rights_id) {
                status = Loader::ResultStatus::Success;
                key = GetTitlekey();
-                if (key == boost::none) {
+                if (!key) {
                    if (status == Loader::ResultStatus::Success)
                        status = Loader::ResultStatus::ErrorMissingTitlekey;
                    return nullptr;
                }
            } else {
                key = GetKeyAreaKey(NCASectionCryptoType::CTR);
-                if (key == boost::none) {
+                if (!key) {
                    status = Loader::ResultStatus::ErrorMissingKeyAreaKey;
                    return nullptr;
                }
            }

-            auto out = std::make_shared<Core::Crypto::CTREncryptionLayer>(
-                std::move(in), key.value(), starting_offset);
+            auto out = std::make_shared<Core::Crypto::CTREncryptionLayer>(std::move(in), *key,
+                                                                          starting_offset);
            std::vector<u8> iv(16);
            for (u8 i = 0; i < 8; ++i)
                iv[i] = s_header.raw.section_ctr[0x8 - i - 1];
--- a/src/core/file_sys/content_archive.h
+++ b/src/core/file_sys/content_archive.h
@@ -6,9 +6,10 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
@@ -111,8 +112,8 @@ private:
    bool ReadPFS0Section(const NCASectionHeader& section, const NCASectionTableEntry& entry);

    u8 GetCryptoRevision() const;
-    boost::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
-    boost::optional<Core::Crypto::Key128> GetTitlekey();
+    std::optional<Core::Crypto::Key128> GetKeyAreaKey(NCASectionCryptoType type) const;
+    std::optional<Core::Crypto::Key128> GetTitlekey();
    VirtualFile Decrypt(const NCASectionHeader& header, VirtualFile in, u64 starting_offset);

    std::vector<VirtualDir> dirs;
--- a/src/core/file_sys/fsmitm_romfsbuild.h
+++ b/src/core/file_sys/fsmitm_romfsbuild.h
@@ -27,7 +27,6 @@
 #include <map>
 #include <memory>
 #include <string>
-#include <boost/detail/container_fwd.hpp>
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"

--- a/src/core/file_sys/ips_layer.cpp
+++ b/src/core/file_sys/ips_layer.cpp
@@ -103,12 +103,12 @@ VirtualFile PatchIPS(const VirtualFile& in, const VirtualFile& ips) {
            offset += sizeof(u16);

            const auto data = ips->ReadByte(offset++);
-            if (data == boost::none)
+            if (!data)
                return nullptr;

            if (real_offset + rle_size > in_data.size())
                rle_size = static_cast<u16>(in_data.size() - real_offset);
-            std::memset(in_data.data() + real_offset, data.get(), rle_size);
+            std::memset(in_data.data() + real_offset, *data, rle_size);
        } else { // Standard Patch
            auto read = data_size;
            if (real_offset + read > in_data.size())
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -61,13 +61,12 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
    // Game Updates
    const auto update_tid = GetUpdateTitleID(title_id);
    const auto update = installed->GetEntry(update_tid, ContentRecordType::Program);
-    if (update != nullptr) {
-        if (update->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS &&
-            update->GetExeFS() != nullptr) {
-            LOG_INFO(Loader, "    ExeFS: Update ({}) applied successfully",
-                     FormatTitleVersion(installed->GetEntryVersion(update_tid).get_value_or(0)));
-            exefs = update->GetExeFS();
-        }
+
+    if (update != nullptr && update->GetExeFS() != nullptr &&
+        update->GetStatus() == Loader::ResultStatus::ErrorMissingBKTRBaseRomFS) {
+        LOG_INFO(Loader, "    ExeFS: Update ({}) applied successfully",
+                 FormatTitleVersion(installed->GetEntryVersion(update_tid).value_or(0)));
+        exefs = update->GetExeFS();
    }

    return exefs;
@@ -237,7 +236,7 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content
        if (new_nca->GetStatus() == Loader::ResultStatus::Success &&
            new_nca->GetRomFS() != nullptr) {
            LOG_INFO(Loader, "    RomFS: Update ({}) applied successfully",
-                     FormatTitleVersion(installed->GetEntryVersion(update_tid).get_value_or(0)));
+                     FormatTitleVersion(installed->GetEntryVersion(update_tid).value_or(0)));
            romfs = new_nca->GetRomFS();
        }
    } else if (update_raw != nullptr) {
@@ -281,12 +280,11 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
    } else {
        if (installed->HasEntry(update_tid, ContentRecordType::Program)) {
            const auto meta_ver = installed->GetEntryVersion(update_tid);
-            if (meta_ver == boost::none || meta_ver.get() == 0) {
+            if (meta_ver.value_or(0) == 0) {
                out.insert_or_assign("Update", "");
            } else {
                out.insert_or_assign(
-                    "Update",
-                    FormatTitleVersion(meta_ver.get(), TitleVersionFormat::ThreeElements));
+                    "Update", FormatTitleVersion(*meta_ver, TitleVersionFormat::ThreeElements));
            }
        } else if (update_raw != nullptr) {
            out.insert_or_assign("Update", "PACKED");
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -159,28 +159,28 @@ VirtualFile RegisteredCache::GetFileAtID(NcaID id) const {
    return file;
 }

-static boost::optional<NcaID> CheckMapForContentRecord(
+static std::optional<NcaID> CheckMapForContentRecord(
    const boost::container::flat_map<u64, CNMT>& map, u64 title_id, ContentRecordType type) {
    if (map.find(title_id) == map.end())
-        return boost::none;
+        return {};

    const auto& cnmt = map.at(title_id);

    const auto iter = std::find_if(cnmt.GetContentRecords().begin(), cnmt.GetContentRecords().end(),
                                   [type](const ContentRecord& rec) { return rec.type == type; });
    if (iter == cnmt.GetContentRecords().end())
-        return boost::none;
+        return {};

-    return boost::make_optional(iter->nca_id);
+    return std::make_optional(iter->nca_id);
 }

-boost::optional<NcaID> RegisteredCache::GetNcaIDFromMetadata(u64 title_id,
-                                                             ContentRecordType type) const {
+std::optional<NcaID> RegisteredCache::GetNcaIDFromMetadata(u64 title_id,
+                                                           ContentRecordType type) const {
    if (type == ContentRecordType::Meta && meta_id.find(title_id) != meta_id.end())
        return meta_id.at(title_id);

    const auto res1 = CheckMapForContentRecord(yuzu_meta, title_id, type);
-    if (res1 != boost::none)
+    if (res1)
        return res1;
    return CheckMapForContentRecord(meta, title_id, type);
 }
@@ -283,17 +283,14 @@ bool RegisteredCache::HasEntry(RegisteredCacheEntry entry) const {

 VirtualFile RegisteredCache::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
    const auto id = GetNcaIDFromMetadata(title_id, type);
-    if (id == boost::none)
-        return nullptr;
-
-    return GetFileAtID(id.get());
+    return id ? GetFileAtID(*id) : nullptr;
 }

 VirtualFile RegisteredCache::GetEntryUnparsed(RegisteredCacheEntry entry) const {
    return GetEntryUnparsed(entry.title_id, entry.type);
 }

-boost::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
+std::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
    const auto meta_iter = meta.find(title_id);
    if (meta_iter != meta.end())
        return meta_iter->second.GetTitleVersion();
@@ -302,15 +299,12 @@ boost::optional<u32> RegisteredCache::GetEntryVersion(u64 title_id) const {
    if (yuzu_meta_iter != yuzu_meta.end())
        return yuzu_meta_iter->second.GetTitleVersion();

-    return boost::none;
+    return {};
 }

 VirtualFile RegisteredCache::GetEntryRaw(u64 title_id, ContentRecordType type) const {
    const auto id = GetNcaIDFromMetadata(title_id, type);
-    if (id == boost::none)
-        return nullptr;
-
-    return parser(GetFileAtID(id.get()), id.get());
+    return id ? parser(GetFileAtID(*id), *id) : nullptr;
 }

 VirtualFile RegisteredCache::GetEntryRaw(RegisteredCacheEntry entry) const {
@@ -364,8 +358,8 @@ std::vector<RegisteredCacheEntry> RegisteredCache::ListEntries() const {
 }

 std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
-    boost::optional<TitleType> title_type, boost::optional<ContentRecordType> record_type,
-    boost::optional<u64> title_id) const {
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
    std::vector<RegisteredCacheEntry> out;
    IterateAllMetadata<RegisteredCacheEntry>(
        out,
@@ -373,11 +367,11 @@ std::vector<RegisteredCacheEntry> RegisteredCache::ListEntriesFilter(
            return RegisteredCacheEntry{c.GetTitleID(), r.type};
        },
        [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-            if (title_type != boost::none && title_type.get() != c.GetType())
+            if (title_type && *title_type != c.GetType())
                return false;
-            if (record_type != boost::none && record_type.get() != r.type)
+            if (record_type && *record_type != r.type)
                return false;
-            if (title_id != boost::none && title_id.get() != c.GetTitleID())
+            if (title_id && *title_id != c.GetTitleID())
                return false;
            return true;
        });
@@ -459,7 +453,7 @@ InstallResult RegisteredCache::InstallEntry(std::shared_ptr<NCA> nca, TitleType

 InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy,
                                             bool overwrite_if_exists,
-                                             boost::optional<NcaID> override_id) {
+                                             std::optional<NcaID> override_id) {
    const auto in = nca->GetBaseFile();
    Core::Crypto::SHA256Hash hash{};

@@ -468,12 +462,12 @@ InstallResult RegisteredCache::RawInstallNCA(std::shared_ptr<NCA> nca, const Vfs
    // game is massive), we're going to cheat and only hash the first MB of the NCA.
    // Also, for XCIs the NcaID matters, so if the override id isn't none, use that.
    NcaID id{};
-    if (override_id == boost::none) {
+    if (override_id) {
+        id = *override_id;
+    } else {
        const auto& data = in->ReadBytes(0x100000);
        mbedtls_sha256(data.data(), data.size(), hash.data(), 0);
        memcpy(id.data(), hash.data(), 16);
-    } else {
-        id = override_id.get();
    }

    std::string path = GetRelativePathFromNcaID(id, false, true);
@@ -543,14 +537,14 @@ bool RegisteredCacheUnion::HasEntry(RegisteredCacheEntry entry) const {
    return HasEntry(entry.title_id, entry.type);
 }

-boost::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
+std::optional<u32> RegisteredCacheUnion::GetEntryVersion(u64 title_id) const {
    for (const auto& c : caches) {
        const auto res = c->GetEntryVersion(title_id);
-        if (res != boost::none)
+        if (res)
            return res;
    }

-    return boost::none;
+    return {};
 }

 VirtualFile RegisteredCacheUnion::GetEntryUnparsed(u64 title_id, ContentRecordType type) const {
@@ -609,8 +603,8 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntries() const {
 }

 std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
-    boost::optional<TitleType> title_type, boost::optional<ContentRecordType> record_type,
-    boost::optional<u64> title_id) const {
+    std::optional<TitleType> title_type, std::optional<ContentRecordType> record_type,
+    std::optional<u64> title_id) const {
    std::vector<RegisteredCacheEntry> out;
    for (const auto& c : caches) {
        c->IterateAllMetadata<RegisteredCacheEntry>(
@@ -619,11 +613,11 @@ std::vector<RegisteredCacheEntry> RegisteredCacheUnion::ListEntriesFilter(
                return RegisteredCacheEntry{c.GetTitleID(), r.type};
            },
            [&title_type, &record_type, &title_id](const CNMT& c, const ContentRecord& r) {
-                if (title_type != boost::none && title_type.get() != c.GetType())
+                if (title_type && *title_type != c.GetType())
                    return false;
-                if (record_type != boost::none && record_type.get() != r.type)
+                if (record_type && *record_type != r.type)
                    return false;
-                if (title_id != boost::none && title_id.get() != c.GetTitleID())
+                if (title_id && *title_id != c.GetTitleID())
                    return false;
                return true;
            });
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -84,7 +84,7 @@ public:
    bool HasEntry(u64 title_id, ContentRecordType type) const;
    bool HasEntry(RegisteredCacheEntry entry) const;

-    boost::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const;

    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
@@ -96,11 +96,10 @@ public:
    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;

    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not boost::none, it will be filtered for from all entries.
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
    std::vector<RegisteredCacheEntry> ListEntriesFilter(
-        boost::optional<TitleType> title_type = boost::none,
-        boost::optional<ContentRecordType> record_type = boost::none,
-        boost::optional<u64> title_id = boost::none) const;
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const;

    // Raw copies all the ncas from the xci/nsp to the csache. Does some quick checks to make sure
    // there is a meta NCA and all of them are accessible.
@@ -125,12 +124,11 @@ private:
    std::vector<NcaID> AccumulateFiles() const;
    void ProcessFiles(const std::vector<NcaID>& ids);
    void AccumulateYuzuMeta();
-    boost::optional<NcaID> GetNcaIDFromMetadata(u64 title_id, ContentRecordType type) const;
+    std::optional<NcaID> GetNcaIDFromMetadata(u64 title_id, ContentRecordType type) const;
    VirtualFile GetFileAtID(NcaID id) const;
    VirtualFile OpenFileOrDirectoryConcat(const VirtualDir& dir, std::string_view path) const;
    InstallResult RawInstallNCA(std::shared_ptr<NCA> nca, const VfsCopyFunction& copy,
-                                bool overwrite_if_exists,
-                                boost::optional<NcaID> override_id = boost::none);
+                                bool overwrite_if_exists, std::optional<NcaID> override_id = {});
    bool RawInstallYuzuMeta(const CNMT& cnmt);

    VirtualDir dir;
@@ -153,7 +151,7 @@ public:
    bool HasEntry(u64 title_id, ContentRecordType type) const;
    bool HasEntry(RegisteredCacheEntry entry) const;

-    boost::optional<u32> GetEntryVersion(u64 title_id) const;
+    std::optional<u32> GetEntryVersion(u64 title_id) const;

    VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const;
    VirtualFile GetEntryUnparsed(RegisteredCacheEntry entry) const;
@@ -165,11 +163,10 @@ public:
    std::unique_ptr<NCA> GetEntry(RegisteredCacheEntry entry) const;

    std::vector<RegisteredCacheEntry> ListEntries() const;
-    // If a parameter is not boost::none, it will be filtered for from all entries.
+    // If a parameter is not std::nullopt, it will be filtered for from all entries.
    std::vector<RegisteredCacheEntry> ListEntriesFilter(
-        boost::optional<TitleType> title_type = boost::none,
-        boost::optional<ContentRecordType> record_type = boost::none,
-        boost::optional<u64> title_id = boost::none) const;
+        std::optional<TitleType> title_type = {}, std::optional<ContentRecordType> record_type = {},
+        std::optional<u64> title_id = {}) const;

 private:
    std::vector<RegisteredCache*> caches;
--- a/src/core/file_sys/vfs.cpp
+++ b/src/core/file_sys/vfs.cpp
@@ -167,13 +167,13 @@ std::string VfsFile::GetExtension() const {

 VfsDirectory::~VfsDirectory() = default;

-boost::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
+std::optional<u8> VfsFile::ReadByte(std::size_t offset) const {
    u8 out{};
    std::size_t size = Read(&out, 1, offset);
    if (size == 1)
        return out;

-    return boost::none;
+    return {};
 }

 std::vector<u8> VfsFile::ReadBytes(std::size_t size, std::size_t offset) const {
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -4,13 +4,15 @@

 #pragma once

+#include <functional>
 #include <map>
 #include <memory>
+#include <optional>
 #include <string>
 #include <string_view>
 #include <type_traits>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_types.h"
 #include "core/file_sys/vfs_types.h"

@@ -103,8 +105,8 @@ public:
    // into file. Returns number of bytes successfully written.
    virtual std::size_t Write(const u8* data, std::size_t length, std::size_t offset = 0) = 0;

-    // Reads exactly one byte at the offset provided, returning boost::none on error.
-    virtual boost::optional<u8> ReadByte(std::size_t offset = 0) const;
+    // Reads exactly one byte at the offset provided, returning std::nullopt on error.
+    virtual std::optional<u8> ReadByte(std::size_t offset = 0) const;
    // Reads size bytes starting at offset in file into a vector.
    virtual std::vector<u8> ReadBytes(std::size_t size, std::size_t offset = 0) const;
    // Reads all the bytes from the file into a vector. Equivalent to 'file->Read(file->GetSize(),
--- a/src/core/file_sys/vfs_offset.cpp
+++ b/src/core/file_sys/vfs_offset.cpp
@@ -57,11 +57,11 @@ std::size_t OffsetVfsFile::Write(const u8* data, std::size_t length, std::size_t
    return file->Write(data, TrimToFit(length, r_offset), offset + r_offset);
 }

-boost::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
+std::optional<u8> OffsetVfsFile::ReadByte(std::size_t r_offset) const {
    if (r_offset < size)
        return file->ReadByte(offset + r_offset);

-    return boost::none;
+    return {};
 }

 std::vector<u8> OffsetVfsFile::ReadBytes(std::size_t r_size, std::size_t r_offset) const {
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -29,7 +29,7 @@ public:
    bool IsReadable() const override;
    std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override;
    std::size_t Write(const u8* data, std::size_t length, std::size_t offset) override;
-    boost::optional<u8> ReadByte(std::size_t offset) const override;
+    std::optional<u8> ReadByte(std::size_t offset) const override;
    std::vector<u8> ReadBytes(std::size_t size, std::size_t offset) const override;
    std::vector<u8> ReadAllBytes() const override;
    bool WriteByte(u8 data, std::size_t offset) override;
--- a/src/core/file_sys/vfs_static.h
+++ b/src/core/file_sys/vfs_static.h
@@ -53,10 +53,10 @@ public:
        return 0;
    }

-    boost::optional<u8> ReadByte(std::size_t offset) const override {
+    std::optional<u8> ReadByte(std::size_t offset) const override {
        if (offset < size)
            return value;
-        return boost::none;
+        return {};
    }

    std::vector<u8> ReadBytes(std::size_t length, std::size_t offset) const override {
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -117,8 +117,7 @@ public:

        AlignWithPadding();

-        const bool request_has_domain_header{context.GetDomainMessageHeader() != nullptr};
-        if (context.Session()->IsDomain() && request_has_domain_header) {
+        if (context.Session()->IsDomain() && context.HasDomainMessageHeader()) {
            IPC::DomainMessageHeader domain_header{};
            domain_header.num_objects = num_domain_objects;
            PushRaw(domain_header);
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -161,8 +161,12 @@ public:
        return buffer_c_desciptors;
    }

-    const std::shared_ptr<IPC::DomainMessageHeader>& GetDomainMessageHeader() const {
-        return domain_message_header;
+    const IPC::DomainMessageHeader* GetDomainMessageHeader() const {
+        return domain_message_header.get();
+    }
+
+    bool HasDomainMessageHeader() const {
+        return domain_message_header != nullptr;
    }

    /// Helper function to read a buffer using the appropriate buffer descriptor
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -32,7 +32,7 @@ namespace Kernel {
 */
 static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_late) {
    const auto proper_handle = static_cast<Handle>(thread_handle);
-    auto& system = Core::System::GetInstance();
+    const auto& system = Core::System::GetInstance();

    // Lock the global kernel mutex when we enter the kernel HLE.
    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
@@ -90,7 +90,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_
 /// The timer callback event, called when a timer is fired
 static void TimerCallback(u64 timer_handle, int cycles_late) {
    const auto proper_handle = static_cast<Handle>(timer_handle);
-    auto& system = Core::System::GetInstance();
+    const auto& system = Core::System::GetInstance();
    SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle);

    if (timer == nullptr) {
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -6,8 +6,6 @@
 #include <utility>
 #include <vector>

-#include <boost/range/algorithm_ext/erase.hpp>
-
 #include "common/assert.h"
 #include "core/core.h"
 #include "core/hle/kernel/errors.h"
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -202,6 +202,16 @@ public:
        return is_64bit_process;
    }

+    /// Gets the total running time of the process instance in ticks.
+    u64 GetCPUTimeTicks() const {
+        return total_process_running_time_ticks;
+    }
+
+    /// Updates the total running time, adding the given ticks to it.
+    void UpdateCPUTimeTicks(u64 ticks) {
+        total_process_running_time_ticks += ticks;
+    }
+
    /**
     * Loads process-specifics configuration info with metadata provided
     * by an executable.
@@ -305,6 +315,9 @@ private:
    /// specified by metadata provided to the process during loading.
    bool is_64bit_process = true;

+    /// Total running time for the process in ticks.
+    u64 total_process_running_time_ticks = 0;
+
    /// Per-process handle table for storing created object handles in.
    HandleTable handle_table;

--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/core_timing.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/scheduler.h"
@@ -34,6 +35,10 @@ Thread* Scheduler::GetCurrentThread() const {
    return current_thread.get();
 }

+u64 Scheduler::GetLastContextSwitchTicks() const {
+    return last_context_switch_time;
+}
+
 Thread* Scheduler::PopNextReadyThread() {
    Thread* next = nullptr;
    Thread* thread = GetCurrentThread();
@@ -54,7 +59,10 @@ Thread* Scheduler::PopNextReadyThread() {
 }

 void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* previous_thread = GetCurrentThread();
+    Thread* const previous_thread = GetCurrentThread();
+    Process* const previous_process = Core::CurrentProcess();
+
+    UpdateLastContextSwitchTime(previous_thread, previous_process);

    // Save context for previous thread
    if (previous_thread) {
@@ -78,8 +86,6 @@ void Scheduler::SwitchContext(Thread* new_thread) {
        // Cancel any outstanding wakeup events for this thread
        new_thread->CancelWakeupTimer();

-        auto* const previous_process = Core::CurrentProcess();
-
        current_thread = new_thread;

        ready_queue.remove(new_thread->GetPriority(), new_thread);
@@ -102,6 +108,22 @@ void Scheduler::SwitchContext(Thread* new_thread) {
    }
 }

+void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
+    const u64 prev_switch_ticks = last_context_switch_time;
+    const u64 most_recent_switch_ticks = CoreTiming::GetTicks();
+    const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
+
+    if (thread != nullptr) {
+        thread->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    if (process != nullptr) {
+        process->UpdateCPUTimeTicks(update_ticks);
+    }
+
+    last_context_switch_time = most_recent_switch_ticks;
+}
+
 void Scheduler::Reschedule() {
    std::lock_guard<std::mutex> lock(scheduler_mutex);

--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -17,6 +17,8 @@ class ARM_Interface;

 namespace Kernel {

+class Process;
+
 class Scheduler final {
 public:
    explicit Scheduler(Core::ARM_Interface& cpu_core);
@@ -31,6 +33,9 @@ public:
    /// Gets the current running thread
    Thread* GetCurrentThread() const;

+    /// Gets the timestamp for the last context switch in ticks.
+    u64 GetLastContextSwitchTicks() const;
+
    /// Adds a new thread to the scheduler
    void AddThread(SharedPtr<Thread> thread, u32 priority);

@@ -64,6 +69,19 @@ private:
     */
    void SwitchContext(Thread* new_thread);

+    /**
+     * Called on every context switch to update the internal timestamp
+     * This also updates the running time ticks for the given thread and
+     * process using the following difference:
+     *
+     * ticks += most_recent_ticks - last_context_switch_ticks
+     *
+     * The internal tick timestamp for the scheduler is simply the
+     * most recent tick count retrieved. No special arithmetic is
+     * applied to it.
+     */
+    void UpdateLastContextSwitchTime(Thread* thread, Process* process);
+
    /// Lists all thread ids that aren't deleted/etc.
    std::vector<SharedPtr<Thread>> thread_list;

@@ -73,6 +91,7 @@ private:
    SharedPtr<Thread> current_thread = nullptr;

    Core::ARM_Interface& cpu_core;
+    u64 last_context_switch_time = 0;

    static std::mutex scheduler_mutex;
 };
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -63,7 +63,7 @@ void ServerSession::Acquire(Thread* thread) {
 }

 ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) {
-    auto& domain_message_header = context.GetDomainMessageHeader();
+    auto* const domain_message_header = context.GetDomainMessageHeader();
    if (domain_message_header) {
        // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs
        context.SetDomainRequestHandlers(domain_request_handlers);
@@ -111,7 +111,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) {

    ResultCode result = RESULT_SUCCESS;
    // If the session has been converted to a domain, handle the domain request
-    if (IsDomain() && context.GetDomainMessageHeader()) {
+    if (IsDomain() && context.HasDomainMessageHeader()) {
        result = HandleDomainSyncRequest(context);
        // If there is no domain header, the regular session handler is used
    } else if (hle_handler != nullptr) {
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -34,6 +34,7 @@
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
 #include "core/hle/service/service.h"
+#include "core/settings.h"

 namespace Kernel {
 namespace {
@@ -395,16 +396,42 @@ struct BreakReason {
 /// Break program execution
 static void Break(u32 reason, u64 info1, u64 info2) {
    BreakReason break_reason{reason};
+    bool has_dumped_buffer{};

+    const auto handle_debug_buffer = [&](VAddr addr, u64 sz) {
+        if (sz == 0 || addr == 0 || has_dumped_buffer) {
+            return;
+        }
+
+        // This typically is an error code so we're going to assume this is the case
+        if (sz == sizeof(u32)) {
+            LOG_CRITICAL(Debug_Emulated, "debug_buffer_err_code={:X}", Memory::Read32(addr));
+        } else {
+            // We don't know what's in here so we'll hexdump it
+            std::vector<u8> debug_buffer(sz);
+            Memory::ReadBlock(addr, debug_buffer.data(), sz);
+            std::string hexdump;
+            for (std::size_t i = 0; i < debug_buffer.size(); i++) {
+                hexdump += fmt::format("{:02X} ", debug_buffer[i]);
+                if (i != 0 && i % 16 == 0) {
+                    hexdump += '\n';
+                }
+            }
+            LOG_CRITICAL(Debug_Emulated, "debug_buffer=\n{}", hexdump);
+        }
+        has_dumped_buffer = true;
+    };
    switch (break_reason.break_type) {
    case BreakType::Panic:
        LOG_CRITICAL(Debug_Emulated, "Signalling debugger, PANIC! info1=0x{:016X}, info2=0x{:016X}",
                     info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    case BreakType::AssertionFailed:
        LOG_CRITICAL(Debug_Emulated,
                     "Signalling debugger, Assertion failed! info1=0x{:016X}, info2=0x{:016X}",
                     info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    case BreakType::PreNROLoad:
        LOG_WARNING(
@@ -433,6 +460,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
            Debug_Emulated,
            "Signalling debugger, Unknown break reason {}, info1=0x{:016X}, info2=0x{:016X}",
            static_cast<u32>(break_reason.break_type.Value()), info1, info2);
+        handle_debug_buffer(info1, info2);
        break;
    }

@@ -441,6 +469,7 @@ static void Break(u32 reason, u64 info1, u64 info2) {
            Debug_Emulated,
            "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
            reason, info1, info2);
+        handle_debug_buffer(info1, info2);
        ASSERT(false);

        Core::CurrentProcess()->PrepareForTermination();
@@ -467,6 +496,37 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
    LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
              info_sub_id, handle);

+    enum class GetInfoType : u64 {
+        // 1.0.0+
+        AllowedCpuIdBitmask = 0,
+        AllowedThreadPrioBitmask = 1,
+        MapRegionBaseAddr = 2,
+        MapRegionSize = 3,
+        HeapRegionBaseAddr = 4,
+        HeapRegionSize = 5,
+        TotalMemoryUsage = 6,
+        TotalHeapUsage = 7,
+        IsCurrentProcessBeingDebugged = 8,
+        ResourceHandleLimit = 9,
+        IdleTickCount = 10,
+        RandomEntropy = 11,
+        PerformanceCounter = 0xF0000002,
+        // 2.0.0+
+        ASLRRegionBaseAddr = 12,
+        ASLRRegionSize = 13,
+        NewMapRegionBaseAddr = 14,
+        NewMapRegionSize = 15,
+        // 3.0.0+
+        IsVirtualAddressMemoryEnabled = 16,
+        PersonalMmHeapUsage = 17,
+        TitleId = 18,
+        // 4.0.0+
+        PrivilegedProcessId = 19,
+        // 5.0.0+
+        UserExceptionContextAddr = 20,
+        ThreadTickCount = 0xF0000002,
+    };
+
    const auto* current_process = Core::CurrentProcess();
    const auto& vm_manager = current_process->VMManager();

@@ -499,7 +559,7 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
        *result = 0;
        break;
    case GetInfoType::RandomEntropy:
-        *result = 0;
+        *result = Settings::values.rng_seed.value_or(0);
        break;
    case GetInfoType::ASLRRegionBaseAddr:
        *result = vm_manager.GetASLRRegionBaseAddress();
@@ -529,6 +589,36 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
                    "(STUBBED) Attempted to query user exception context address, returned 0");
        *result = 0;
        break;
+    case GetInfoType::ThreadTickCount: {
+        constexpr u64 num_cpus = 4;
+        if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
+            return ERR_INVALID_COMBINATION_KERNEL;
+        }
+
+        const auto thread =
+            current_process->GetHandleTable().Get<Thread>(static_cast<Handle>(handle));
+        if (!thread) {
+            return ERR_INVALID_HANDLE;
+        }
+
+        const auto& system = Core::System::GetInstance();
+        const auto& scheduler = system.CurrentScheduler();
+        const auto* const current_thread = scheduler.GetCurrentThread();
+        const bool same_thread = current_thread == thread;
+
+        const u64 prev_ctx_ticks = scheduler.GetLastContextSwitchTicks();
+        u64 out_ticks = 0;
+        if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
+            const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
+
+            out_ticks = thread_ticks + (CoreTiming::GetTicks() - prev_ctx_ticks);
+        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
+            out_ticks = CoreTiming::GetTicks() - prev_ctx_ticks;
+        }
+
+        *result = out_ticks;
+        break;
+    }
    default:
        UNIMPLEMENTED();
    }
--- a/src/core/hle/kernel/svc.h
+++ b/src/core/hle/kernel/svc.h
@@ -24,37 +24,6 @@ struct PageInfo {
    u64 flags;
 };

-/// Values accepted by svcGetInfo
-enum class GetInfoType : u64 {
-    // 1.0.0+
-    AllowedCpuIdBitmask = 0,
-    AllowedThreadPrioBitmask = 1,
-    MapRegionBaseAddr = 2,
-    MapRegionSize = 3,
-    HeapRegionBaseAddr = 4,
-    HeapRegionSize = 5,
-    TotalMemoryUsage = 6,
-    TotalHeapUsage = 7,
-    IsCurrentProcessBeingDebugged = 8,
-    ResourceHandleLimit = 9,
-    IdleTickCount = 10,
-    RandomEntropy = 11,
-    PerformanceCounter = 0xF0000002,
-    // 2.0.0+
-    ASLRRegionBaseAddr = 12,
-    ASLRRegionSize = 13,
-    NewMapRegionBaseAddr = 14,
-    NewMapRegionSize = 15,
-    // 3.0.0+
-    IsVirtualAddressMemoryEnabled = 16,
-    PersonalMmHeapUsage = 17,
-    TitleId = 18,
-    // 4.0.0+
-    PrivilegedProcessId = 19,
-    // 5.0.0+
-    UserExceptionContextAddr = 20,
-};
-
 void CallSVC(u32 immediate);

 } // namespace Kernel
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -4,9 +4,9 @@

 #include <algorithm>
 #include <cinttypes>
+#include <optional>
 #include <vector>

-#include <boost/optional.hpp>
 #include <boost/range/algorithm_ext/erase.hpp>

 #include "common/assert.h"
@@ -94,7 +94,7 @@ void Thread::CancelWakeupTimer() {
    CoreTiming::UnscheduleEventThreadsafe(kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }

-static boost::optional<s32> GetNextProcessorId(u64 mask) {
+static std::optional<s32> GetNextProcessorId(u64 mask) {
    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
        if (mask & (1ULL << index)) {
            if (!Core::System::GetInstance().Scheduler(index).GetCurrentThread()) {
@@ -142,7 +142,7 @@ void Thread::ResumeFromWait() {

    status = ThreadStatus::Ready;

-    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
+    std::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
    if (!new_processor_id) {
        new_processor_id = processor_id;
    }
@@ -369,7 +369,7 @@ void Thread::ChangeCore(u32 core, u64 mask) {
        return;
    }

-    boost::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
+    std::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};

    if (!new_processor_id) {
        new_processor_id = processor_id;
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -258,6 +258,14 @@ public:
        return last_running_ticks;
    }

+    u64 GetTotalCPUTimeTicks() const {
+        return total_cpu_time_ticks;
+    }
+
+    void UpdateCPUTimeTicks(u64 ticks) {
+        total_cpu_time_ticks += ticks;
+    }
+
    s32 GetProcessorID() const {
        return processor_id;
    }
@@ -378,7 +386,8 @@ private:
    u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application
    u32 current_priority = 0; ///< Current thread priority, can be temporarily changed

-    u64 last_running_ticks = 0; ///< CPU tick when thread was last running
+    u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
+    u64 last_running_ticks = 0;   ///< CPU tick when thread was last running

    s32 processor_id = 0;

--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -242,6 +242,28 @@ void Module::Interface::GetBaasAccountManagerForApplication(Kernel::HLERequestCo
    LOG_DEBUG(Service_ACC, "called");
 }

+void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_ACC, "called");
+    // A u8 is passed into this function which we can safely ignore. It's to determine if we have
+    // access to use the network or not by the looks of it
+    IPC::ResponseBuilder rb{ctx, 6};
+    if (profile_manager->GetUserCount() != 1) {
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u128>(INVALID_UUID);
+        return;
+    }
+    auto user_list = profile_manager->GetAllUsers();
+    if (user_list.empty()) {
+        rb.Push(ResultCode(-1)); // TODO(ogniK): Find the correct error code
+        rb.PushRaw<u128>(INVALID_UUID);
+        return;
+    }
+
+    // Select the first user we have
+    rb.Push(RESULT_SUCCESS);
+    rb.PushRaw<u128>(profile_manager->GetUser(0)->uuid);
+}
+
 Module::Interface::Interface(std::shared_ptr<Module> module,
                             std::shared_ptr<ProfileManager> profile_manager, const char* name)
    : ServiceFramework(name), module(std::move(module)),
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -27,6 +27,7 @@ public:
        void InitializeApplicationInfo(Kernel::HLERequestContext& ctx);
        void GetBaasAccountManagerForApplication(Kernel::HLERequestContext& ctx);
        void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx);
+        void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx);

    protected:
        std::shared_ptr<Module> module;
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,7 +17,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_SU::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,7 +17,7 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_U0::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
        {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,7 +17,7 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
        {5, &ACC_U1::GetProfile, "GetProfile"},
        {6, nullptr, "GetProfileDigest"},
        {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
-        {51, nullptr, "TrySelectUserWithoutInteraction"},
+        {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
        {60, nullptr, "ListOpenContextStoredUsers"},
        {100, nullptr, "GetUserRegistrationNotifier"},
        {101, nullptr, "GetUserStateChangeNotifier"},
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -195,7 +195,7 @@ std::size_t ProfileManager::GetOpenUserCount() const {

 /// Checks if a user id exists in our profile manager
 bool ProfileManager::UserExists(UUID uuid) const {
-    return GetUserIndex(uuid) != std::nullopt;
+    return GetUserIndex(uuid).has_value();
 }

 bool ProfileManager::UserExistsIndex(std::size_t index) const {
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -57,7 +57,8 @@ struct UUID {
 };
 static_assert(sizeof(UUID) == 16, "UUID is an invalid size!");

-using ProfileUsername = std::array<u8, 0x20>;
+constexpr std::size_t profile_username_size = 32;
+using ProfileUsername = std::array<u8, profile_username_size>;
 using ProfileData = std::array<u8, MAX_DATA>;
 using UserIDArray = std::array<UUID, MAX_USERS>;

--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -338,7 +338,54 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
    LOG_WARNING(Service_AM, "(STUBBED) called");
 }

-ICommonStateGetter::ICommonStateGetter() : ServiceFramework("ICommonStateGetter") {
+AppletMessageQueue::AppletMessageQueue() {
+    auto& kernel = Core::System::GetInstance().Kernel();
+    on_new_message = Kernel::Event::Create(kernel, Kernel::ResetType::Sticky,
+                                           "AMMessageQueue:OnMessageRecieved");
+    on_operation_mode_changed = Kernel::Event::Create(kernel, Kernel::ResetType::OneShot,
+                                                      "AMMessageQueue:OperationModeChanged");
+}
+
+AppletMessageQueue::~AppletMessageQueue() = default;
+
+const Kernel::SharedPtr<Kernel::Event>& AppletMessageQueue::GetMesssageRecieveEvent() const {
+    return on_new_message;
+}
+
+const Kernel::SharedPtr<Kernel::Event>& AppletMessageQueue::GetOperationModeChangedEvent() const {
+    return on_operation_mode_changed;
+}
+
+void AppletMessageQueue::PushMessage(AppletMessage msg) {
+    messages.push(msg);
+    on_new_message->Signal();
+}
+
+AppletMessageQueue::AppletMessage AppletMessageQueue::PopMessage() {
+    if (messages.empty()) {
+        on_new_message->Clear();
+        return AppletMessage::NoMessage;
+    }
+    auto msg = messages.front();
+    messages.pop();
+    if (messages.empty()) {
+        on_new_message->Clear();
+    }
+    return msg;
+}
+
+std::size_t AppletMessageQueue::GetMessageCount() const {
+    return messages.size();
+}
+
+void AppletMessageQueue::OperationModeChanged() {
+    PushMessage(AppletMessage::OperationModeChanged);
+    PushMessage(AppletMessage::PerformanceModeChanged);
+    on_operation_mode_changed->Signal();
+}
+
+ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue)
+    : ServiceFramework("ICommonStateGetter"), msg_queue(std::move(msg_queue)) {
    // clang-format off
    static const FunctionInfo functions[] = {
        {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"},
@@ -388,21 +435,19 @@ void ICommonStateGetter::GetBootMode(Kernel::HLERequestContext& ctx) {
 }

 void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {
-    event->Signal();
-
    IPC::ResponseBuilder rb{ctx, 2, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(event);
+    rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent());

-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called");
 }

 void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 3};
    rb.Push(RESULT_SUCCESS);
-    rb.Push<u32>(15);
+    rb.PushEnum<AppletMessageQueue::AppletMessage>(msg_queue->PopMessage());

-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called");
 }

 void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) {
@@ -414,13 +459,11 @@ void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) {
 }

 void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) {
-    event->Signal();
-
    IPC::ResponseBuilder rb{ctx, 2, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushCopyObjects(event);
+    rb.PushCopyObjects(msg_queue->GetOperationModeChangedEvent());

-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called");
 }

 void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx) {
@@ -444,7 +487,7 @@ void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
    rb.Push(RESULT_SUCCESS);
    rb.Push(static_cast<u8>(use_docked_mode ? OperationMode::Docked : OperationMode::Handheld));

-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called");
 }

 void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
@@ -454,7 +497,7 @@ void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
    rb.Push(static_cast<u32>(use_docked_mode ? APM::PerformanceMode::Docked
                                             : APM::PerformanceMode::Handheld));

-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "called");
 }

 class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
@@ -743,7 +786,7 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {

    Account::ProfileManager profile_manager{};
    const auto uuid = profile_manager.GetUser(Settings::values.current_user);
-    ASSERT(uuid != std::nullopt);
+    ASSERT(uuid);
    params.current_user = uuid->uuid;

    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
@@ -840,8 +883,12 @@ void IApplicationFunctions::GetPseudoDeviceId(Kernel::HLERequestContext& ctx) {

 void InstallInterfaces(SM::ServiceManager& service_manager,
                       std::shared_ptr<NVFlinger::NVFlinger> nvflinger) {
-    std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager);
-    std::make_shared<AppletOE>(nvflinger)->InstallAsService(service_manager);
+    auto message_queue = std::make_shared<AppletMessageQueue>();
+    message_queue->PushMessage(
+        AppletMessageQueue::AppletMessage::FocusStateChanged); // Needed on game boot
+
+    std::make_shared<AppletAE>(nvflinger, message_queue)->InstallAsService(service_manager);
+    std::make_shared<AppletOE>(nvflinger, message_queue)->InstallAsService(service_manager);
    std::make_shared<IdleSys>()->InstallAsService(service_manager);
    std::make_shared<OMM>()->InstallAsService(service_manager);
    std::make_shared<SPSM>()->InstallAsService(service_manager);
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <memory>
+#include <queue>
 #include "core/hle/service/service.h"

 namespace Kernel {
@@ -39,6 +40,31 @@ enum SystemLanguage {
    TraditionalChinese = 16,
 };

+class AppletMessageQueue {
+public:
+    enum class AppletMessage : u32 {
+        NoMessage = 0,
+        FocusStateChanged = 15,
+        OperationModeChanged = 30,
+        PerformanceModeChanged = 31,
+    };
+
+    AppletMessageQueue();
+    ~AppletMessageQueue();
+
+    const Kernel::SharedPtr<Kernel::Event>& GetMesssageRecieveEvent() const;
+    const Kernel::SharedPtr<Kernel::Event>& GetOperationModeChangedEvent() const;
+    void PushMessage(AppletMessage msg);
+    AppletMessage PopMessage();
+    std::size_t GetMessageCount() const;
+    void OperationModeChanged();
+
+private:
+    std::queue<AppletMessage> messages;
+    Kernel::SharedPtr<Kernel::Event> on_new_message;
+    Kernel::SharedPtr<Kernel::Event> on_operation_mode_changed;
+};
+
 class IWindowController final : public ServiceFramework<IWindowController> {
 public:
    IWindowController();
@@ -102,7 +128,7 @@ private:

 class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
 public:
-    ICommonStateGetter();
+    explicit ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue);
    ~ICommonStateGetter() override;

 private:
@@ -126,6 +152,7 @@ private:
    void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx);

    Kernel::SharedPtr<Kernel::Event> event;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> {
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -12,8 +12,10 @@ namespace Service::AM {

 class ILibraryAppletProxy final : public ServiceFramework<ILibraryAppletProxy> {
 public:
-    explicit ILibraryAppletProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
-        : ServiceFramework("ILibraryAppletProxy"), nvflinger(std::move(nvflinger)) {
+    explicit ILibraryAppletProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                                 std::shared_ptr<AppletMessageQueue> msg_queue)
+        : ServiceFramework("ILibraryAppletProxy"), nvflinger(std::move(nvflinger)),
+          msg_queue(std::move(msg_queue)) {
        static const FunctionInfo functions[] = {
            {0, &ILibraryAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"},
            {1, &ILibraryAppletProxy::GetSelfController, "GetSelfController"},
@@ -32,7 +34,7 @@ private:
    void GetCommonStateGetter(Kernel::HLERequestContext& ctx) {
        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ICommonStateGetter>();
+        rb.PushIpcInterface<ICommonStateGetter>(msg_queue);
        LOG_DEBUG(Service_AM, "called");
    }

@@ -93,12 +95,15 @@ private:
    }

    std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 class ISystemAppletProxy final : public ServiceFramework<ISystemAppletProxy> {
 public:
-    explicit ISystemAppletProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
-        : ServiceFramework("ISystemAppletProxy"), nvflinger(std::move(nvflinger)) {
+    explicit ISystemAppletProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                                std::shared_ptr<AppletMessageQueue> msg_queue)
+        : ServiceFramework("ISystemAppletProxy"), nvflinger(std::move(nvflinger)),
+          msg_queue(std::move(msg_queue)) {
        static const FunctionInfo functions[] = {
            {0, &ISystemAppletProxy::GetCommonStateGetter, "GetCommonStateGetter"},
            {1, &ISystemAppletProxy::GetSelfController, "GetSelfController"},
@@ -119,7 +124,7 @@ private:
    void GetCommonStateGetter(Kernel::HLERequestContext& ctx) {
        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ICommonStateGetter>();
+        rb.PushIpcInterface<ICommonStateGetter>(msg_queue);
        LOG_DEBUG(Service_AM, "called");
    }

@@ -186,31 +191,34 @@ private:
        LOG_DEBUG(Service_AM, "called");
    }
    std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 void AppletAE::OpenSystemAppletProxy(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ISystemAppletProxy>(nvflinger);
+    rb.PushIpcInterface<ISystemAppletProxy>(nvflinger, msg_queue);
    LOG_DEBUG(Service_AM, "called");
 }

 void AppletAE::OpenLibraryAppletProxy(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ILibraryAppletProxy>(nvflinger);
+    rb.PushIpcInterface<ILibraryAppletProxy>(nvflinger, msg_queue);
    LOG_DEBUG(Service_AM, "called");
 }

 void AppletAE::OpenLibraryAppletProxyOld(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<ILibraryAppletProxy>(nvflinger);
+    rb.PushIpcInterface<ILibraryAppletProxy>(nvflinger, msg_queue);
    LOG_DEBUG(Service_AM, "called");
 }

-AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
-    : ServiceFramework("appletAE"), nvflinger(std::move(nvflinger)) {
+AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                   std::shared_ptr<AppletMessageQueue> msg_queue)
+    : ServiceFramework("appletAE"), nvflinger(std::move(nvflinger)),
+      msg_queue(std::move(msg_queue)) {
    // clang-format off
    static const FunctionInfo functions[] = {
        {100, &AppletAE::OpenSystemAppletProxy, "OpenSystemAppletProxy"},
@@ -228,4 +236,8 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)

 AppletAE::~AppletAE() = default;

+const std::shared_ptr<AppletMessageQueue>& AppletAE::GetMessageQueue() const {
+    return msg_queue;
+}
+
 } // namespace Service::AM
--- a/src/core/hle/service/am/applet_ae.h
+++ b/src/core/hle/service/am/applet_ae.h
@@ -17,15 +17,19 @@ namespace AM {

 class AppletAE final : public ServiceFramework<AppletAE> {
 public:
-    explicit AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger);
+    explicit AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                      std::shared_ptr<AppletMessageQueue> msg_queue);
    ~AppletAE() override;

+    const std::shared_ptr<AppletMessageQueue>& GetMessageQueue() const;
+
 private:
    void OpenSystemAppletProxy(Kernel::HLERequestContext& ctx);
    void OpenLibraryAppletProxy(Kernel::HLERequestContext& ctx);
    void OpenLibraryAppletProxyOld(Kernel::HLERequestContext& ctx);

    std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 } // namespace AM
--- a/src/core/hle/service/am/applet_oe.cpp
+++ b/src/core/hle/service/am/applet_oe.cpp
@@ -12,8 +12,10 @@ namespace Service::AM {

 class IApplicationProxy final : public ServiceFramework<IApplicationProxy> {
 public:
-    explicit IApplicationProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
-        : ServiceFramework("IApplicationProxy"), nvflinger(std::move(nvflinger)) {
+    explicit IApplicationProxy(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                               std::shared_ptr<AppletMessageQueue> msg_queue)
+        : ServiceFramework("IApplicationProxy"), nvflinger(std::move(nvflinger)),
+          msg_queue(std::move(msg_queue)) {
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, &IApplicationProxy::GetCommonStateGetter, "GetCommonStateGetter"},
@@ -70,7 +72,7 @@ private:
    void GetCommonStateGetter(Kernel::HLERequestContext& ctx) {
        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<ICommonStateGetter>();
+        rb.PushIpcInterface<ICommonStateGetter>(msg_queue);
        LOG_DEBUG(Service_AM, "called");
    }

@@ -89,17 +91,20 @@ private:
    }

    std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 void AppletOE::OpenApplicationProxy(Kernel::HLERequestContext& ctx) {
    IPC::ResponseBuilder rb{ctx, 2, 0, 1};
    rb.Push(RESULT_SUCCESS);
-    rb.PushIpcInterface<IApplicationProxy>(nvflinger);
+    rb.PushIpcInterface<IApplicationProxy>(nvflinger, msg_queue);
    LOG_DEBUG(Service_AM, "called");
 }

-AppletOE::AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
-    : ServiceFramework("appletOE"), nvflinger(std::move(nvflinger)) {
+AppletOE::AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                   std::shared_ptr<AppletMessageQueue> msg_queue)
+    : ServiceFramework("appletOE"), nvflinger(std::move(nvflinger)),
+      msg_queue(std::move(msg_queue)) {
    static const FunctionInfo functions[] = {
        {0, &AppletOE::OpenApplicationProxy, "OpenApplicationProxy"},
    };
@@ -108,4 +113,8 @@ AppletOE::AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)

 AppletOE::~AppletOE() = default;

+const std::shared_ptr<AppletMessageQueue>& AppletOE::GetMessageQueue() const {
+    return msg_queue;
+}
+
 } // namespace Service::AM
--- a/src/core/hle/service/am/applet_oe.h
+++ b/src/core/hle/service/am/applet_oe.h
@@ -17,13 +17,17 @@ namespace AM {

 class AppletOE final : public ServiceFramework<AppletOE> {
 public:
-    explicit AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger);
+    explicit AppletOE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger,
+                      std::shared_ptr<AppletMessageQueue> msg_queue);
    ~AppletOE() override;

+    const std::shared_ptr<AppletMessageQueue>& GetMessageQueue() const;
+
 private:
    void OpenApplicationProxy(Kernel::HLERequestContext& ctx);

    std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
+    std::shared_ptr<AppletMessageQueue> msg_queue;
 };

 } // namespace AM
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -161,7 +161,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
    ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");

    std::size_t worker_sz = WorkerBufferSize(channel_count);
-    ASSERT_MSG(buffer_sz < worker_sz, "Worker buffer too large");
+    ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
    std::unique_ptr<OpusDecoder, OpusDeleter> decoder{
        static_cast<OpusDecoder*>(operator new(worker_sz))};
    if (opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -273,8 +273,8 @@ public:
            {0, &IFileSystem::CreateFile, "CreateFile"},
            {1, &IFileSystem::DeleteFile, "DeleteFile"},
            {2, &IFileSystem::CreateDirectory, "CreateDirectory"},
-            {3, nullptr, "DeleteDirectory"},
-            {4, nullptr, "DeleteDirectoryRecursively"},
+            {3, &IFileSystem::DeleteDirectory, "DeleteDirectory"},
+            {4, &IFileSystem::DeleteDirectoryRecursively, "DeleteDirectoryRecursively"},
            {5, &IFileSystem::RenameFile, "RenameFile"},
            {6, nullptr, "RenameDirectory"},
            {7, &IFileSystem::GetEntryType, "GetEntryType"},
@@ -329,6 +329,30 @@ public:
        rb.Push(backend.CreateDirectory(name));
    }

+    void DeleteDirectory(Kernel::HLERequestContext& ctx) {
+        const IPC::RequestParser rp{ctx};
+
+        const auto file_buffer = ctx.ReadBuffer();
+        std::string name = Common::StringFromBuffer(file_buffer);
+
+        LOG_DEBUG(Service_FS, "called directory {}", name);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(backend.DeleteDirectory(name));
+    }
+
+    void DeleteDirectoryRecursively(Kernel::HLERequestContext& ctx) {
+        const IPC::RequestParser rp{ctx};
+
+        const auto file_buffer = ctx.ReadBuffer();
+        std::string name = Common::StringFromBuffer(file_buffer);
+
+        LOG_DEBUG(Service_FS, "called directory {}", name);
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(backend.DeleteDirectoryRecursively(name));
+    }
+
    void RenameFile(Kernel::HLERequestContext& ctx) {
        IPC::RequestParser rp{ctx};

--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -392,8 +392,10 @@ std::size_t Controller_NPad::GetSupportedNPadIdTypesSize() const {
 }

 void Controller_NPad::SetHoldType(NpadHoldType joy_hold_type) {
+    styleset_changed_event->Signal();
    hold_type = joy_hold_type;
 }
+
 Controller_NPad::NpadHoldType Controller_NPad::GetHoldType() const {
    return hold_type;
 }
@@ -427,6 +429,9 @@ void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids,
 }

 Kernel::SharedPtr<Kernel::Event> Controller_NPad::GetStyleSetChangedEvent() const {
+    // TODO(ogniK): Figure out the best time to signal this event. This event seems that it should
+    // be signalled at least once, and signaled after a new controller is connected?
+    styleset_changed_event->Signal();
    return styleset_changed_event;
 }

--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -96,6 +96,8 @@ public:
        // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)

        CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
+
+        ReloadInputDevices();
    }

    void ActivateController(HidController controller) {
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -212,7 +212,7 @@ private:
        IPC::ResponseBuilder rb{ctx, 2};
        auto amiibo = nfp_interface.GetAmiiboBuffer();
        TagInfo tag_info{};
-        std::memcpy(tag_info.uuid.data(), amiibo.uuid.data(), sizeof(tag_info.uuid.size()));
+        tag_info.uuid = amiibo.uuid;
        tag_info.uuid_length = static_cast<u8>(tag_info.uuid.size());

        tag_info.protocol = 1; // TODO(ogniK): Figure out actual values
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -31,7 +31,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
    buffer_wait_event->Signal();
 }

-boost::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
+std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
    auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
        // Only consider free buffers. Buffers become free once again after they've been Acquired
        // and Released by the compositor, see the NVFlinger::Compose method.
@@ -44,7 +44,7 @@ boost::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
    });

    if (itr == queue.end()) {
-        return boost::none;
+        return {};
    }

    itr->status = Buffer::Status::Dequeued;
@@ -70,12 +70,12 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
    itr->crop_rect = crop_rect;
 }

-boost::optional<const BufferQueue::Buffer&> BufferQueue::AcquireBuffer() {
+std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
    auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) {
        return buffer.status == Buffer::Status::Queued;
    });
    if (itr == queue.end())
-        return boost::none;
+        return {};
    itr->status = Buffer::Status::Acquired;
    return *itr;
 }
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,8 +4,9 @@

 #pragma once

+#include <optional>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_funcs.h"
 #include "common/math_util.h"
 #include "common/swap.h"
@@ -57,9 +58,9 @@ public:
        /// Rotate source image 90 degrees clockwise
        Rotate90 = 0x04,
        /// Rotate source image 180 degrees
-        Roate180 = 0x03,
+        Rotate180 = 0x03,
        /// Rotate source image 270 degrees clockwise
-        Roate270 = 0x07,
+        Rotate270 = 0x07,
    };

    struct Buffer {
@@ -73,11 +74,11 @@ public:
    };

    void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
-    boost::optional<u32> DequeueBuffer(u32 width, u32 height);
+    std::optional<u32> DequeueBuffer(u32 width, u32 height);
    const IGBPBuffer& RequestBuffer(u32 slot) const;
    void QueueBuffer(u32 slot, BufferTransformFlags transform,
                     const MathUtil::Rectangle<int>& crop_rect);
-    boost::optional<const Buffer&> AcquireBuffer();
+    std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
    void ReleaseBuffer(u32 slot);
    u32 Query(QueryType type);

--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.

 #include <algorithm>
-#include <boost/optional.hpp>
+#include <optional>

 #include "common/alignment.h"
 #include "common/assert.h"
@@ -134,7 +134,7 @@ void NVFlinger::Compose() {

        MicroProfileFlip();

-        if (buffer == boost::none) {
+        if (!buffer) {
            auto& system_instance = Core::System::GetInstance();

            // There was no queued buffer to draw, render previous frame
@@ -143,7 +143,7 @@ void NVFlinger::Compose() {
            continue;
        }

-        auto& igbp_buffer = buffer->igbp_buffer;
+        auto& igbp_buffer = buffer->get().igbp_buffer;

        // Now send the buffer to the GPU for drawing.
        // TODO(Subv): Support more than just disp0. The display device selection is probably based
@@ -152,10 +152,10 @@ void NVFlinger::Compose() {
        ASSERT(nvdisp);

        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
-                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->transform,
-                     buffer->crop_rect);
+                     igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
+                     buffer->get().transform, buffer->get().crop_rect);

-        buffer_queue->ReleaseBuffer(buffer->slot);
+        buffer_queue->ReleaseBuffer(buffer->get().slot);
    }
 }

--- a/src/core/hle/service/spl/module.cpp
+++ b/src/core/hle/service/spl/module.cpp
@@ -3,18 +3,23 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <chrono>
 #include <cstdlib>
+#include <ctime>
+#include <functional>
 #include <vector>
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/service/spl/csrng.h"
 #include "core/hle/service/spl/module.h"
 #include "core/hle/service/spl/spl.h"
+#include "core/settings.h"

 namespace Service::SPL {

 Module::Interface::Interface(std::shared_ptr<Module> module, const char* name)
-    : ServiceFramework(name), module(std::move(module)) {}
+    : ServiceFramework(name), module(std::move(module)),
+      rng(Settings::values.rng_seed.value_or(std::time(nullptr))) {}

 Module::Interface::~Interface() = default;

@@ -24,7 +29,7 @@ void Module::Interface::GetRandomBytes(Kernel::HLERequestContext& ctx) {
    std::size_t size = ctx.GetWriteBufferSize();

    std::vector<u8> data(size);
-    std::generate(data.begin(), data.end(), std::rand);
+    std::generate(data.begin(), data.end(), rng);

    ctx.WriteBuffer(data);

--- a/src/core/hle/service/spl/module.h
+++ b/src/core/hle/service/spl/module.h
@@ -4,6 +4,7 @@

 #pragma once

+#include <random>
 #include "core/hle/service/service.h"

 namespace Service::SPL {
@@ -19,6 +20,9 @@ public:

    protected:
        std::shared_ptr<Module> module;
+
+    private:
+        std::mt19937 rng;
    };
 };

--- a/src/core/hle/service/time/interface.cpp
+++ b/src/core/hle/service/time/interface.cpp
@@ -21,7 +21,7 @@ Time::Time(std::shared_ptr<Module> time, const char* name)
        {102, nullptr, "GetStandardUserSystemClockInitialYear"},
        {200, nullptr, "IsStandardNetworkSystemClockAccuracySufficient"},
        {300, nullptr, "CalculateMonotonicSystemClockBaseTimePoint"},
-        {400, nullptr, "GetClockSnapshot"},
+        {400, &Time::GetClockSnapshot, "GetClockSnapshot"},
        {401, nullptr, "GetClockSnapshotFromSystemClockContext"},
        {500, nullptr, "CalculateStandardUserSystemClockDifferenceByUser"},
        {501, nullptr, "CalculateSpanBetween"},
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -15,6 +15,44 @@

 namespace Service::Time {

+static void PosixToCalendar(u64 posix_time, CalendarTime& calendar_time,
+                            CalendarAdditionalInfo& additional_info,
+                            [[maybe_unused]] const TimeZoneRule& /*rule*/) {
+    const std::time_t time(posix_time);
+    const std::tm* tm = std::localtime(&time);
+    if (tm == nullptr) {
+        calendar_time = {};
+        additional_info = {};
+        return;
+    }
+    calendar_time.year = tm->tm_year + 1900;
+    calendar_time.month = tm->tm_mon + 1;
+    calendar_time.day = tm->tm_mday;
+    calendar_time.hour = tm->tm_hour;
+    calendar_time.minute = tm->tm_min;
+    calendar_time.second = tm->tm_sec;
+
+    additional_info.day_of_week = tm->tm_wday;
+    additional_info.day_of_year = tm->tm_yday;
+    std::memcpy(additional_info.name.data(), "UTC", sizeof("UTC"));
+    additional_info.utc_offset = 0;
+}
+
+static u64 CalendarToPosix(const CalendarTime& calendar_time,
+                           [[maybe_unused]] const TimeZoneRule& /*rule*/) {
+    std::tm time{};
+    time.tm_year = calendar_time.year - 1900;
+    time.tm_mon = calendar_time.month - 1;
+    time.tm_mday = calendar_time.day;
+
+    time.tm_hour = calendar_time.hour;
+    time.tm_min = calendar_time.minute;
+    time.tm_sec = calendar_time.second;
+
+    std::time_t epoch_time = std::mktime(&time);
+    return static_cast<u64>(epoch_time);
+}
+
 class ISystemClock final : public ServiceFramework<ISystemClock> {
 public:
    ISystemClock() : ServiceFramework("ISystemClock") {
@@ -80,8 +118,8 @@ public:
            {5, nullptr, "GetTimeZoneRuleVersion"},
            {100, &ITimeZoneService::ToCalendarTime, "ToCalendarTime"},
            {101, &ITimeZoneService::ToCalendarTimeWithMyRule, "ToCalendarTimeWithMyRule"},
-            {201, nullptr, "ToPosixTime"},
-            {202, nullptr, "ToPosixTimeWithMyRule"},
+            {201, &ITimeZoneService::ToPosixTime, "ToPosixTime"},
+            {202, &ITimeZoneService::ToPosixTimeWithMyRule, "ToPosixTimeWithMyRule"},
        };
        RegisterHandlers(functions);
    }
@@ -151,24 +189,29 @@ private:
        rb.PushRaw(additional_info);
    }

-    void PosixToCalendar(u64 posix_time, CalendarTime& calendar_time,
-                         CalendarAdditionalInfo& additional_info, const TimeZoneRule& /*rule*/) {
-        std::time_t t(posix_time);
-        std::tm* tm = std::localtime(&t);
-        if (!tm) {
-            return;
-        }
-        calendar_time.year = tm->tm_year + 1900;
-        calendar_time.month = tm->tm_mon + 1;
-        calendar_time.day = tm->tm_mday;
-        calendar_time.hour = tm->tm_hour;
-        calendar_time.minute = tm->tm_min;
-        calendar_time.second = tm->tm_sec;
+    void ToPosixTime(Kernel::HLERequestContext& ctx) {
+        // TODO(ogniK): Figure out how to handle multiple times
+        LOG_WARNING(Service_Time, "(STUBBED) called");
+        IPC::RequestParser rp{ctx};
+        auto calendar_time = rp.PopRaw<CalendarTime>();
+        auto posix_time = CalendarToPosix(calendar_time, {});

-        additional_info.day_of_week = tm->tm_wday;
-        additional_info.day_of_year = tm->tm_yday;
-        std::memcpy(additional_info.name.data(), "UTC", sizeof("UTC"));
-        additional_info.utc_offset = 0;
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u32>(1); // Amount of times we're returning
+        ctx.WriteBuffer(&posix_time, sizeof(u64));
+    }
+
+    void ToPosixTimeWithMyRule(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_Time, "(STUBBED) called");
+        IPC::RequestParser rp{ctx};
+        auto calendar_time = rp.PopRaw<CalendarTime>();
+        auto posix_time = CalendarToPosix(calendar_time, {});
+
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.PushRaw<u32>(1); // Amount of times we're returning
+        ctx.WriteBuffer(&posix_time, sizeof(u64));
    }
 };

@@ -207,6 +250,55 @@ void Module::Interface::GetStandardLocalSystemClock(Kernel::HLERequestContext& c
    LOG_DEBUG(Service_Time, "called");
 }

+void Module::Interface::GetClockSnapshot(Kernel::HLERequestContext& ctx) {
+    LOG_DEBUG(Service_Time, "called");
+
+    IPC::RequestParser rp{ctx};
+    auto unknown_u8 = rp.PopRaw<u8>();
+
+    ClockSnapshot clock_snapshot{};
+
+    const s64 time_since_epoch{std::chrono::duration_cast<std::chrono::seconds>(
+                                   std::chrono::system_clock::now().time_since_epoch())
+                                   .count()};
+    CalendarTime calendar_time{};
+    const std::time_t time(time_since_epoch);
+    const std::tm* tm = std::localtime(&time);
+    if (tm == nullptr) {
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(ResultCode(-1)); // TODO(ogniK): Find appropriate error code
+        return;
+    }
+    SteadyClockTimePoint steady_clock_time_point{CoreTiming::cyclesToMs(CoreTiming::GetTicks()) /
+                                                 1000};
+
+    LocationName location_name{"UTC"};
+    calendar_time.year = tm->tm_year + 1900;
+    calendar_time.month = tm->tm_mon + 1;
+    calendar_time.day = tm->tm_mday;
+    calendar_time.hour = tm->tm_hour;
+    calendar_time.minute = tm->tm_min;
+    calendar_time.second = tm->tm_sec;
+    clock_snapshot.system_posix_time = time_since_epoch;
+    clock_snapshot.network_posix_time = time_since_epoch;
+    clock_snapshot.system_calendar_time = calendar_time;
+    clock_snapshot.network_calendar_time = calendar_time;
+
+    CalendarAdditionalInfo additional_info{};
+    PosixToCalendar(time_since_epoch, calendar_time, additional_info, {});
+
+    clock_snapshot.system_calendar_info = additional_info;
+    clock_snapshot.network_calendar_info = additional_info;
+
+    clock_snapshot.steady_clock_timepoint = steady_clock_time_point;
+    clock_snapshot.location_name = location_name;
+    clock_snapshot.clock_auto_adjustment_enabled = 1;
+    clock_snapshot.ipc_u8 = unknown_u8;
+    IPC::ResponseBuilder rb{ctx, 2};
+    rb.Push(RESULT_SUCCESS);
+    ctx.WriteBuffer(&clock_snapshot, sizeof(ClockSnapshot));
+}
+
 Module::Interface::Interface(std::shared_ptr<Module> time, const char* name)
    : ServiceFramework(name), time(std::move(time)) {}

--- a/src/core/hle/service/time/time.h
+++ b/src/core/hle/service/time/time.h
@@ -5,6 +5,7 @@
 #pragma once

 #include <array>
+#include "common/common_funcs.h"
 #include "core/hle/service/service.h"

 namespace Service::Time {
@@ -53,6 +54,23 @@ struct SystemClockContext {
 static_assert(sizeof(SystemClockContext) == 0x20,
              "SystemClockContext structure has incorrect size");

+struct ClockSnapshot {
+    SystemClockContext user_clock_context;
+    SystemClockContext network_clock_context;
+    s64_le system_posix_time;
+    s64_le network_posix_time;
+    CalendarTime system_calendar_time;
+    CalendarTime network_calendar_time;
+    CalendarAdditionalInfo system_calendar_info;
+    CalendarAdditionalInfo network_calendar_info;
+    SteadyClockTimePoint steady_clock_timepoint;
+    LocationName location_name;
+    u8 clock_auto_adjustment_enabled;
+    u8 ipc_u8;
+    INSERT_PADDING_BYTES(2);
+};
+static_assert(sizeof(ClockSnapshot) == 0xd0, "ClockSnapshot is an invalid size");
+
 class Module final {
 public:
    class Interface : public ServiceFramework<Interface> {
@@ -65,6 +83,7 @@ public:
        void GetStandardSteadyClock(Kernel::HLERequestContext& ctx);
        void GetTimeZoneService(Kernel::HLERequestContext& ctx);
        void GetStandardLocalSystemClock(Kernel::HLERequestContext& ctx);
+        void GetClockSnapshot(Kernel::HLERequestContext& ctx);

    protected:
        std::shared_ptr<Module> time;
--- a/src/core/hle/service/usb/usb.cpp
+++ b/src/core/hle/service/usb/usb.cpp
@@ -132,11 +132,11 @@ public:
        // clang-format off
        static const FunctionInfo functions[] = {
            {0, nullptr, "BindNoticeEvent"},
-            {1, nullptr, "Unknown1"},
+            {1, nullptr, "UnbindNoticeEvent"},
            {2, nullptr, "GetStatus"},
            {3, nullptr, "GetNotice"},
-            {4, nullptr, "Unknown2"},
-            {5, nullptr, "Unknown3"},
+            {4, nullptr, "EnablePowerRequestNotice"},
+            {5, nullptr, "DisablePowerRequestNotice"},
            {6, nullptr, "ReplyPowerRequest"},
        };
        // clang-format on
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -6,9 +6,10 @@
 #include <array>
 #include <cstring>
 #include <memory>
+#include <optional>
 #include <type_traits>
 #include <utility>
-#include <boost/optional.hpp>
+
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_funcs.h"
@@ -506,9 +507,9 @@ private:
            IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
            const u32 width{request.data.width};
            const u32 height{request.data.height};
-            boost::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+            std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);

-            if (slot != boost::none) {
+            if (slot) {
                // Buffer is available
                IGBPDequeueBufferResponseParcel response{*slot};
                ctx.WriteBuffer(response.Serialize());
@@ -520,7 +521,7 @@ private:
                        Kernel::ThreadWakeupReason reason) {
                        // Repeat TransactParcel DequeueBuffer when a buffer is available
                        auto buffer_queue = nv_flinger->GetBufferQueue(id);
-                        boost::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
+                        std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height);
                        IGBPDequeueBufferResponseParcel response{*slot};
                        ctx.WriteBuffer(response.Serialize());
                        IPC::ResponseBuilder rb{ctx, 2};
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -6,10 +6,11 @@

 #include <iosfwd>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/common_types.h"
 #include "core/file_sys/vfs.h"

@@ -145,7 +146,7 @@ public:
     * information.
     * @returns A pair with the optional system mode, and and the status.
     */
-    virtual std::pair<boost::optional<u32>, ResultStatus> LoadKernelSystemMode() {
+    virtual std::pair<std::optional<u32>, ResultStatus> LoadKernelSystemMode() {
        // 96MB allocated to the application.
        return std::make_pair(2, ResultStatus::Success);
    }
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -36,6 +36,16 @@ AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file)

    std::tie(nacp_file, icon_file) =
        FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca);
+
+    if (nsp->IsExtractedType()) {
+        secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS());
+    } else {
+        if (title_id == 0)
+            return;
+
+        secondary_loader = std::make_unique<AppLoader_NCA>(
+            nsp->GetNCAFile(title_id, FileSys::ContentRecordType::Program));
+    }
 }

 AppLoader_NSP::~AppLoader_NSP() = default;
@@ -67,26 +77,19 @@ ResultStatus AppLoader_NSP::Load(Kernel::Process& process) {
        return ResultStatus::ErrorAlreadyLoaded;
    }

-    if (nsp->IsExtractedType()) {
-        secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS());
-    } else {
-        if (title_id == 0)
-            return ResultStatus::ErrorNSPMissingProgramNCA;
+    if (title_id == 0)
+        return ResultStatus::ErrorNSPMissingProgramNCA;

-        secondary_loader = std::make_unique<AppLoader_NCA>(
-            nsp->GetNCAFile(title_id, FileSys::ContentRecordType::Program));
+    if (nsp->GetStatus() != ResultStatus::Success)
+        return nsp->GetStatus();

-        if (nsp->GetStatus() != ResultStatus::Success)
-            return nsp->GetStatus();
+    if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
+        return nsp->GetProgramStatus(title_id);

-        if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
-            return nsp->GetProgramStatus(title_id);
-
-        if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
-            if (!Core::Crypto::KeyManager::KeyFileExists(false))
-                return ResultStatus::ErrorMissingProductionKeyFile;
-            return ResultStatus::ErrorNSPMissingProgramNCA;
-        }
+    if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
+        if (!Core::Crypto::KeyManager::KeyFileExists(false))
+            return ResultStatus::ErrorMissingProductionKeyFile;
+        return ResultStatus::ErrorNSPMissingProgramNCA;
    }

    const auto result = secondary_loader->Load(process);
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,9 +4,9 @@

 #include <algorithm>
 #include <cstring>
+#include <optional>
 #include <utility>

-#include <boost/optional.hpp>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
--- a/src/core/memory_hook.h
+++ b/src/core/memory_hook.h
@@ -5,7 +5,8 @@
 #pragma once

 #include <memory>
-#include <boost/optional.hpp>
+#include <optional>
+
 #include "common/common_types.h"

 namespace Memory {
@@ -18,19 +19,19 @@ namespace Memory {
 *
 * A hook may be mapped to multiple regions of memory.
 *
- * If a boost::none or false is returned from a function, the read/write request is passed through
+ * If a std::nullopt or false is returned from a function, the read/write request is passed through
 * to the underlying memory region.
 */
 class MemoryHook {
 public:
    virtual ~MemoryHook();

-    virtual boost::optional<bool> IsValidAddress(VAddr addr) = 0;
+    virtual std::optional<bool> IsValidAddress(VAddr addr) = 0;

-    virtual boost::optional<u8> Read8(VAddr addr) = 0;
-    virtual boost::optional<u16> Read16(VAddr addr) = 0;
-    virtual boost::optional<u32> Read32(VAddr addr) = 0;
-    virtual boost::optional<u64> Read64(VAddr addr) = 0;
+    virtual std::optional<u8> Read8(VAddr addr) = 0;
+    virtual std::optional<u16> Read16(VAddr addr) = 0;
+    virtual std::optional<u32> Read32(VAddr addr) = 0;
+    virtual std::optional<u64> Read64(VAddr addr) = 0;

    virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;

--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <optional>
 #include <string>
 #include "common/common_types.h"

@@ -114,8 +115,9 @@ struct Values {
    // System
    bool use_docked_mode;
    bool enable_nfc;
-    int current_user;
-    int language_index;
+    std::optional<u64> rng_seed;
+    s32 current_user;
+    s32 language_index;

    // Controls
    std::array<std::string, NativeButton::NumButtons> buttons;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -184,4 +184,13 @@ TelemetrySession::~TelemetrySession() {
    backend = nullptr;
 }

+bool TelemetrySession::SubmitTestcase() {
+#ifdef ENABLE_WEB_SERVICE
+    field_collection.Accept(*backend);
+    return backend->SubmitTestcase();
+#else
+    return false;
+#endif
+}
+
 } // namespace Core
--- a/src/core/telemetry_session.h
+++ b/src/core/telemetry_session.h
@@ -31,6 +31,12 @@ public:
        field_collection.AddField(type, name, std::move(value));
    }

+    /**
+     * Submits a Testcase.
+     * @returns A bool indicating whether the submission succeeded
+     */
+    bool SubmitTestcase();
+
 private:
    Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session
    std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -64,11 +64,11 @@ void TestEnvironment::ClearWriteRecords() {

 TestEnvironment::TestMemory::~TestMemory() {}

-boost::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
+std::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
    return true;
 }

-boost::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
+std::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
    const auto iter = data.find(addr);

    if (iter == data.end()) {
@@ -79,15 +79,15 @@ boost::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
    return iter->second;
 }

-boost::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
+std::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
    return *Read8(addr) | static_cast<u16>(*Read8(addr + 1)) << 8;
 }

-boost::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
+std::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
    return *Read16(addr) | static_cast<u32>(*Read16(addr + 2)) << 16;
 }

-boost::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
+std::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
    return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
 }

--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -64,12 +64,12 @@ private:

        ~TestMemory() override;

-        boost::optional<bool> IsValidAddress(VAddr addr) override;
+        std::optional<bool> IsValidAddress(VAddr addr) override;

-        boost::optional<u8> Read8(VAddr addr) override;
-        boost::optional<u16> Read16(VAddr addr) override;
-        boost::optional<u32> Read32(VAddr addr) override;
-        boost::optional<u64> Read64(VAddr addr) override;
+        std::optional<u8> Read8(VAddr addr) override;
+        std::optional<u16> Read16(VAddr addr) override;
+        std::optional<u32> Read32(VAddr addr) override;
+        std::optional<u64> Read64(VAddr addr) override;

        bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(video_core STATIC
    macro_interpreter.h
    memory_manager.cpp
    memory_manager.h
+    rasterizer_cache.cpp
    rasterizer_cache.h
    rasterizer_interface.h
    renderer_base.cpp
@@ -33,6 +34,7 @@ add_library(video_core STATIC
    renderer_opengl/gl_rasterizer.h
    renderer_opengl/gl_rasterizer_cache.cpp
    renderer_opengl/gl_rasterizer_cache.h
+    renderer_opengl/gl_resource_manager.cpp
    renderer_opengl/gl_resource_manager.h
    renderer_opengl/gl_shader_cache.cpp
    renderer_opengl/gl_shader_cache.h
@@ -51,6 +53,10 @@ add_library(video_core STATIC
    renderer_opengl/maxwell_to_gl.h
    renderer_opengl/renderer_opengl.cpp
    renderer_opengl/renderer_opengl.h
+    renderer_opengl/utils.cpp
+    renderer_opengl/utils.h
+    surface.cpp
+    surface.h
    textures/astc.cpp
    textures/astc.h
    textures/decoders.cpp
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -81,7 +81,7 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
    for (auto entry : commands) {
        Tegra::GPUVAddr address = entry.Address();
        u32 size = entry.sz;
-        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+        const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
        VAddr current_addr = *head_address;
        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
            const CommandHeader header = {Memory::Read32(current_addr)};
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -37,21 +37,52 @@ void Maxwell3D::InitializeRegisterDefaults() {
        regs.viewport[viewport].depth_range_near = 0.0f;
        regs.viewport[viewport].depth_range_far = 1.0f;
    }
+    // Doom and Bomberman seems to use the uninitialized registers and just enable blend
+    // so initialize blend registers with sane values
+    regs.blend.equation_rgb = Regs::Blend::Equation::Add;
+    regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
+    regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+    regs.blend.equation_a = Regs::Blend::Equation::Add;
+    regs.blend.factor_source_a = Regs::Blend::Factor::One;
+    regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
+    for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
+        regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
+        regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
+        regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
+        regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+    }
+    regs.stencil_front_op_fail = Regs::StencilOp::Keep;
+    regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
+    regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
+    regs.stencil_front_func_func = Regs::ComparisonOp::Always;
+    regs.stencil_front_func_mask = 0xFFFFFFFF;
+    regs.stencil_front_mask = 0xFFFFFFFF;
+    regs.stencil_two_side_enable = 1;
+    regs.stencil_back_op_fail = Regs::StencilOp::Keep;
+    regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
+    regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
+    regs.stencil_back_func_func = Regs::ComparisonOp::Always;
+    regs.stencil_back_func_mask = 0xFFFFFFFF;
+    regs.stencil_back_mask = 0xFFFFFFFF;
 }

 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
    // Reset the current macro.
    executing_macro = 0;

-    // The requested macro must have been uploaded already.
-    auto macro_code = uploaded_macros.find(method);
-    if (macro_code == uploaded_macros.end()) {
-        LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+    // Lookup the macro offset
+    const u32 entry{(method - MacroRegistersStart) >> 1};
+    const auto& search{macro_offsets.find(entry)};
+    if (search == macro_offsets.end()) {
+        LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
+        UNREACHABLE();
        return;
    }

    // Execute the current macro.
-    macro_interpreter.Execute(macro_code->second, std::move(parameters));
+    macro_interpreter.Execute(search->second, std::move(parameters));
 }

 void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
@@ -90,13 +121,25 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
        debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
    }

+    u32 old = regs.reg_array[method];
    regs.reg_array[method] = value;

+    if (value != old) {
+        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+            dirty_flags.vertex_attrib_format = true;
+        }
+    }
+
    switch (method) {
    case MAXWELL3D_REG_INDEX(macros.data): {
        ProcessMacroUpload(value);
        break;
    }
+    case MAXWELL3D_REG_INDEX(macros.bind): {
+        ProcessMacroBind(value);
+        break;
+    }
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
    case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -158,16 +201,20 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 }

 void Maxwell3D::ProcessMacroUpload(u32 data) {
-    // Store the uploaded macro code to interpret them when they're called.
-    auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
-    macro.push_back(data);
+    ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
+               "upload_address exceeded macro_memory size!");
+    macro_memory[regs.macros.upload_address++] = data;
+}
+
+void Maxwell3D::ProcessMacroBind(u32 data) {
+    macro_offsets[regs.macros.entry] = data;
 }

 void Maxwell3D::ProcessQueryGet() {
    GPUVAddr sequence_address = regs.query.QueryAddress();
    // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
    // VAddr before writing.
-    boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+    std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);

    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -285,7 +332,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
    // Don't allow writing past the end of the buffer.
    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);

-    boost::optional<VAddr> address =
+    std::optional<VAddr> address =
        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);

    Memory::Write32(*address, value);
@@ -298,7 +345,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
    GPUVAddr tic_base_address = regs.tic.TICAddress();

    GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
-    boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+    std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);

    Texture::TICEntry tic_entry;
    Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -322,7 +369,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
    GPUVAddr tsc_base_address = regs.tsc.TSCAddress();

    GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
-    boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+    std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);

    Texture::TSCEntry tsc_entry;
    Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -386,7 +433,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,

    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);

-    boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};

    Texture::FullTextureInfo tex_info{};
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -345,6 +345,14 @@ public:
            Invert = 6,
            IncrWrap = 7,
            DecrWrap = 8,
+            KeepOGL = 0x1E00,
+            ZeroOGL = 0,
+            ReplaceOGL = 0x1E01,
+            IncrOGL = 0x1E02,
+            DecrOGL = 0x1E03,
+            InvertOGL = 0x150A,
+            IncrWrapOGL = 0x8507,
+            DecrWrapOGL = 0x8508,
        };

        enum class MemoryLayout : u32 {
@@ -462,6 +470,16 @@ public:
            }
        };

+        struct ColorMask {
+            union {
+                u32 raw;
+                BitField<0, 4, u32> R;
+                BitField<4, 4, u32> G;
+                BitField<8, 4, u32> B;
+                BitField<12, 4, u32> A;
+            };
+        };
+
        bool IsShaderConfigEnabled(std::size_t index) const {
            // The VertexB is always enabled.
            if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -475,12 +493,13 @@ public:
                INSERT_PADDING_WORDS(0x45);

                struct {
-                    INSERT_PADDING_WORDS(1);
+                    u32 upload_address;
                    u32 data;
                    u32 entry;
+                    u32 bind;
                } macros;

-                INSERT_PADDING_WORDS(0x189);
+                INSERT_PADDING_WORDS(0x188);

                u32 tfb_enabled;

@@ -570,7 +589,11 @@ public:
                u32 stencil_back_mask;
                u32 stencil_back_func_mask;

-                INSERT_PADDING_WORDS(0x13);
+                INSERT_PADDING_WORDS(0xC);
+
+                u32 color_mask_common;
+
+                INSERT_PADDING_WORDS(0x6);

                u32 rt_separate_frag_data;

@@ -645,8 +668,14 @@ public:
                ComparisonOp depth_test_func;
                float alpha_test_ref;
                ComparisonOp alpha_test_func;
-
-                INSERT_PADDING_WORDS(0x9);
+                u32 draw_tfb_stride;
+                struct {
+                    float r;
+                    float g;
+                    float b;
+                    float a;
+                } blend_color;
+                INSERT_PADDING_WORDS(0x4);

                struct {
                    u32 separate_alpha;
@@ -723,7 +752,11 @@ public:
                StencilOp stencil_back_op_zpass;
                ComparisonOp stencil_back_func_func;

-                INSERT_PADDING_WORDS(0x17);
+                INSERT_PADDING_WORDS(0x4);
+
+                u32 framebuffer_srgb;
+
+                INSERT_PADDING_WORDS(0x12);

                union {
                    BitField<2, 1, u32> coord_origin;
@@ -751,7 +784,14 @@ public:
                    };
                } draw;

-                INSERT_PADDING_WORDS(0x6B);
+                INSERT_PADDING_WORDS(0xA);
+
+                struct {
+                    u32 enabled;
+                    u32 index;
+                } primitive_restart;
+
+                INSERT_PADDING_WORDS(0x5F);

                struct {
                    u32 start_addr_high;
@@ -829,8 +869,9 @@ public:
                    BitField<6, 4, u32> RT;
                    BitField<10, 11, u32> layer;
                } clear_buffers;
-
-                INSERT_PADDING_WORDS(0x4B);
+                INSERT_PADDING_WORDS(0xB);
+                std::array<ColorMask, NumRenderTargets> color_mask;
+                INSERT_PADDING_WORDS(0x38);

                struct {
                    u32 query_address_high;
@@ -971,6 +1012,12 @@ public:
    State state{};
    MemoryManager& memory_manager;

+    struct DirtyFlags {
+        bool vertex_attrib_format = true;
+    };
+
+    DirtyFlags dirty_flags;
+
    /// Reads a register value located at the input method address
    u32 GetRegisterValue(u32 method) const;

@@ -983,12 +1030,25 @@ public:
    /// Returns the texture information for a specific texture in a specific shader stage.
    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;

+    /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
+    /// we've seen used.
+    using MacroMemory = std::array<u32, 0x40000>;
+
+    /// Gets a reference to macro memory.
+    const MacroMemory& GetMacroMemory() const {
+        return macro_memory;
+    }
+
 private:
    void InitializeRegisterDefaults();

    VideoCore::RasterizerInterface& rasterizer;

-    std::unordered_map<u32, std::vector<u32>> uploaded_macros;
+    /// Start offsets of each macro in macro_memory
+    std::unordered_map<u32, u32> macro_offsets;
+
+    /// Memory for macro code
+    MacroMemory macro_memory;

    /// Macro method that is currently being executed / being fed parameters.
    u32 executing_macro = 0;
@@ -1011,9 +1071,12 @@ private:
     */
    void CallMacroMethod(u32 method, std::vector<u32> parameters);

-    /// Handles writes to the macro uploading registers.
+    /// Handles writes to the macro uploading register.
    void ProcessMacroUpload(u32 data);

+    /// Handles writes to the macro bind register.
+    void ProcessMacroBind(u32 data);
+
    /// Handles a write to the CLEAR_BUFFERS register.
    void ProcessClearBuffers();

@@ -1047,6 +1110,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1059,6 +1123,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
 ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
 ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
 ASSERT_REG_POSITION(depth_test_func, 0x4C3);
+ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
+ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
+ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
+ASSERT_REG_POSITION(blend_color, 0x4C7);
 ASSERT_REG_POSITION(blend, 0x4CF);
 ASSERT_REG_POSITION(stencil_enable, 0x4E0);
 ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
@@ -1079,14 +1147,17 @@ ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
 ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
 ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
 ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
+ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
 ASSERT_REG_POSITION(point_coord_replace, 0x581);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(logic_op, 0x671);
 ASSERT_REG_POSITION(clear_buffers, 0x674);
+ASSERT_REG_POSITION(color_mask, 0x680);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
 ASSERT_REG_POSITION(independent_blend, 0x780);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -5,12 +5,11 @@
 #pragma once

 #include <bitset>
+#include <optional>
 #include <string>
 #include <tuple>
 #include <vector>

-#include <boost/optional.hpp>
-
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
@@ -578,6 +577,10 @@ union Instruction {
        BitField<55, 1, u64> saturate;
    } fmul32;

+    union {
+        BitField<52, 1, u64> generates_cc;
+    } op_32;
+
    union {
        BitField<48, 1, u64> is_signed;
    } shift;
@@ -1232,6 +1235,7 @@ union Instruction {
    BitField<60, 1, u64> is_b_gpr;
    BitField<59, 1, u64> is_c_gpr;
    BitField<20, 24, s64> smem_imm;
+    BitField<0, 5, ControlCode> flow_control_code;

    Attribute attribute;
    Sampler sampler;
@@ -1456,7 +1460,7 @@ public:
        Type type;
    };

-    static boost::optional<const Matcher&> Decode(Instruction instr) {
+    static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
        static const auto table{GetDecodeTable()};

        const auto matches_instruction = [instr](const auto& matcher) {
@@ -1464,7 +1468,8 @@ public:
        };

        auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
-        return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+        return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
+                                   : std::nullopt;
    }

 private:
@@ -1658,4 +1663,4 @@ private:
    }
 };

-} // namespace Tegra::Shader
+} // namespace Tegra::Shader
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,7 +11,7 @@ namespace Tegra {

 MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}

-void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
+void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
    Reset();
    registers[1] = parameters[0];
    this->parameters = std::move(parameters);
@@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
    // Execute the code until we hit an exit condition.
    bool keep_executing = true;
    while (keep_executing) {
-        keep_executing = Step(code, false);
+        keep_executing = Step(offset, false);
    }

    // Assert the the macro used all the input parameters
@@ -29,7 +29,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
 void MacroInterpreter::Reset() {
    registers = {};
    pc = 0;
-    delayed_pc = boost::none;
+    delayed_pc = {};
    method_address.raw = 0;
    parameters.clear();
    // The next parameter index starts at 1, because $r1 already has the value of the first
@@ -37,17 +37,17 @@ void MacroInterpreter::Reset() {
    next_parameter_index = 1;
 }

-bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
+bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
    u32 base_address = pc;

-    Opcode opcode = GetOpcode(code);
+    Opcode opcode = GetOpcode(offset);
    pc += 4;

    // Update the program counter if we were delayed
-    if (delayed_pc != boost::none) {
+    if (delayed_pc) {
        ASSERT(is_delay_slot);
        pc = *delayed_pc;
-        delayed_pc = boost::none;
+        delayed_pc = {};
    }

    switch (opcode.operation) {
@@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {

            delayed_pc = base_address + opcode.GetBranchTarget();
            // Execute one more instruction due to the delay slot.
-            return Step(code, true);
+            return Step(offset, true);
        }
        break;
    }
@@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
        // Exit has a delay slot, execute the next instruction
        // Note: Executing an exit during a branch delay slot will cause the instruction at the
        // branch target to be executed before exiting.
-        Step(code, true);
+        Step(offset, true);
        return false;
    }

    return true;
 }

-MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
+MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
+    const auto& macro_memory{maxwell3d.GetMacroMemory()};
    ASSERT((pc % sizeof(u32)) == 0);
-    ASSERT(pc < code.size() * sizeof(u32));
-    return {code[pc / sizeof(u32)]};
+    ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
+    return {macro_memory[offset + pc / sizeof(u32)]};
 }

 u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -5,8 +5,9 @@
 #pragma once

 #include <array>
+#include <optional>
 #include <vector>
-#include <boost/optional.hpp>
+
 #include "common/bit_field.h"
 #include "common/common_types.h"

@@ -21,10 +22,10 @@ public:

    /**
     * Executes the macro code with the specified input parameters.
-     * @param code The macro byte code to execute
-     * @param parameters The parameters of the macro
+     * @param offset Offset to start execution at.
+     * @param parameters The parameters of the macro.
     */
-    void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
+    void Execute(u32 offset, std::vector<u32> parameters);

 private:
    enum class Operation : u32 {
@@ -109,11 +110,11 @@ private:
    /**
     * Executes a single macro instruction located at the current program counter. Returns whether
     * the interpreter should keep running.
-     * @param code The macro code to execute.
+     * @param offset Offset to start execution at.
     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
     * previous instruction.
     */
-    bool Step(const std::vector<u32>& code, bool is_delay_slot);
+    bool Step(u32 offset, bool is_delay_slot);

    /// Calculates the result of an ALU operation. src_a OP src_b;
    u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
@@ -126,7 +127,7 @@ private:
    bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;

    /// Reads an opcode at the current program counter location.
-    Opcode GetOpcode(const std::vector<u32>& code) const;
+    Opcode GetOpcode(u32 offset) const;

    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
    u32 GetRegister(u32 register_id) const;
@@ -149,7 +150,7 @@ private:
    Engines::Maxwell3D& maxwell3d;

    u32 pc; ///< Current program counter
-    boost::optional<u32>
+    std::optional<u32>
        delayed_pc; ///< Program counter to execute at after the delay slot is executed.

    static constexpr std::size_t NumMacroRegisters = 8;
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,18 +4,21 @@

 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "video_core/memory_manager.h"

 namespace Tegra {

 GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
-    boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align);
-    ASSERT(gpu_addr);
+    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(*gpu_addr + offset);
+    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(*gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Allocated);
    }

@@ -23,10 +26,11 @@ GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
 }

 GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Allocated);
    }

@@ -34,17 +38,19 @@ GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
 }

 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
-    boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE);
-    ASSERT(gpu_addr);
+    const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(*gpu_addr + offset);
+    ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(*gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
        slot = cpu_addr + offset;
    }

-    MappedRegion region{cpu_addr, *gpu_addr, size};
+    const MappedRegion region{cpu_addr, *gpu_addr, size};
    mapped_regions.push_back(region);

    return *gpu_addr;
@@ -53,14 +59,31 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
 GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
    ASSERT((gpu_addr & PAGE_MASK) == 0);

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
+        // Page has been already mapped. In this case, we must find a new area of memory to use that
+        // is different than the specified one. Super Mario Odyssey hits this scenario when changing
+        // areas, but we do not want to overwrite the old pages.
+        // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+
+        LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+
+        const std::optional<GPUVAddr> new_gpu_addr{
+            FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+
+        ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+
+        gpu_addr = *new_gpu_addr;
+    }
+
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+
        slot = cpu_addr + offset;
    }

-    MappedRegion region{cpu_addr, gpu_addr, size};
+    const MappedRegion region{cpu_addr, gpu_addr, size};
    mapped_regions.push_back(region);

    return gpu_addr;
@@ -69,11 +92,12 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
 GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
    ASSERT((gpu_addr & PAGE_MASK) == 0);

-    for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
-        VAddr& slot = PageSlot(gpu_addr + offset);
+    for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+        VAddr& slot{PageSlot(gpu_addr + offset)};

        ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
               slot != static_cast<u64>(PageStatus::Unmapped));
+
        slot = static_cast<u64>(PageStatus::Unmapped);
    }

@@ -97,13 +121,14 @@ GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
    return {};
 }

-boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
-    GPUVAddr gpu_addr = 0;
-    u64 free_space = 0;
+std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+                                                     PageStatus status) {
+    GPUVAddr gpu_addr{region_start};
+    u64 free_space{};
    align = (align + PAGE_MASK) & ~PAGE_MASK;

    while (gpu_addr + free_space < MAX_ADDRESS) {
-        if (!IsPageMapped(gpu_addr + free_space)) {
+        if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
            free_space += PAGE_SIZE;
            if (free_space >= size) {
                return gpu_addr;
@@ -118,8 +143,8 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
    return {};
 }

-boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
-    VAddr base_addr = PageSlot(gpu_addr);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
+    const VAddr base_addr{PageSlot(gpu_addr)};

    if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
        base_addr == static_cast<u64>(PageStatus::Unmapped)) {
@@ -133,19 +158,15 @@ std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
    std::vector<GPUVAddr> results;
    for (const auto& region : mapped_regions) {
        if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
-            u64 offset = cpu_addr - region.cpu_addr;
+            const u64 offset{cpu_addr - region.cpu_addr};
            results.push_back(region.gpu_addr + offset);
        }
    }
    return results;
 }

-bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) {
-    return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped);
-}
-
 VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
-    auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
+    auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
    if (!block) {
        block = std::make_unique<PageBlock>();
        block->fill(static_cast<VAddr>(PageStatus::Unmapped));
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,10 +6,9 @@

 #include <array>
 #include <memory>
+#include <optional>
 #include <vector>

-#include <boost/optional.hpp>
-
 #include "common/common_types.h"

 namespace Tegra {
@@ -27,7 +26,7 @@ public:
    GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
    GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
    GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
-    boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
+    std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
    std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;

    static constexpr u64 PAGE_BITS = 16;
@@ -35,15 +34,15 @@ public:
    static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;

 private:
-    boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1);
-    bool IsPageMapped(GPUVAddr gpu_addr);
-    VAddr& PageSlot(GPUVAddr gpu_addr);
-
    enum class PageStatus : u64 {
        Unmapped = 0xFFFFFFFFFFFFFFFFULL,
        Allocated = 0xFFFFFFFFFFFFFFFEULL,
    };

+    std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+                                          PageStatus status);
+    VAddr& PageSlot(GPUVAddr gpu_addr);
+
    static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
    static constexpr u64 PAGE_TABLE_BITS{10};
    static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
--- a/src/video_core/rasterizer_cache.cpp
+++ b/src/video_core/rasterizer_cache.cpp
@@ -0,0 +1,7 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/rasterizer_cache.h"
+
+RasterizerCacheObject::~RasterizerCacheObject() = default;
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -10,13 +10,13 @@
 #include <boost/range/iterator_range_core.hpp>

 #include "common/common_types.h"
-#include "core/core.h"
 #include "core/settings.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"

 class RasterizerCacheObject {
 public:
+    virtual ~RasterizerCacheObject();
+
    /// Gets the address of the shader in guest memory, required for cache management
    virtual VAddr GetAddr() const = 0;

@@ -64,6 +64,8 @@ class RasterizerCache : NonCopyable {
    friend class RasterizerCacheObject;

 public:
+    explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+
    /// Write any cached resources overlapping the specified region back to memory
    void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
        const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -109,14 +111,12 @@ protected:
    void Register(const T& object) {
        object->SetIsRegistered(true);
        object_cache.add({GetInterval(object), ObjectSet{object}});
-        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
    }

    /// Unregisters an object from the cache
    void Unregister(const T& object) {
        object->SetIsRegistered(false);
-        auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
        rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);

        // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
@@ -177,4 +177,5 @@ private:

    ObjectCache object_cache; ///< Cache of objects
    u64 modified_ticks{};     ///< Counter of cache state ticks, used for in-order flushing
+    VideoCore::RasterizerInterface& rasterizer;
 };
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -6,7 +6,8 @@

 #include <atomic>
 #include <memory>
-#include <boost/optional.hpp>
+#include <optional>
+
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/rasterizer_interface.h"
@@ -28,7 +29,8 @@ public:
    virtual ~RendererBase();

    /// Swap buffers (render frame)
-    virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0;
+    virtual void SwapBuffers(
+        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;

    /// Initialize the renderer
    virtual bool Init() = 0;
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -9,15 +9,17 @@
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"

 namespace OpenGL {

-OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
+    : RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}

 GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
                                      std::size_t alignment, bool cache) {
    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};

    // Cache management is a big overhead, so only cache entries with a given size.
    // TODO: Figure out which size is the best for given games.
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -15,6 +15,8 @@

 namespace OpenGL {

+class RasterizerOpenGL;
+
 struct CachedBufferEntry final : public RasterizerCacheObject {
    VAddr GetAddr() const override {
        return addr;
@@ -35,7 +37,7 @@ struct CachedBufferEntry final : public RasterizerCacheObject {

 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
 public:
-    explicit OGLBufferCache(std::size_t size);
+    explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);

    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
    /// allocated.
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -6,6 +6,7 @@
 #include <array>
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "core/core.h"
 #include "core/memory.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
@@ -45,7 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);

    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
    const u8* source{Memory::GetPointer(*cpu_addr)};

    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,10 +30,11 @@
 namespace OpenGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PixelFormat = SurfaceParams::PixelFormat;
-using SurfaceType = SurfaceParams::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using SurfaceType = VideoCore::Surface::SurfaceType;

-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
@@ -79,7 +80,8 @@ struct DrawParameters {
 };

 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
-    : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
+    : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
+      buffer_cache(*this, STREAM_BUFFER_SIZE) {
    // Create sampler objects
    for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
        texture_samplers[i].Create();
@@ -104,7 +106,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
    }

    ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
-
+    OpenGLState::ApplyDefaultState();
    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;

@@ -115,8 +117,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
    state.draw.shader_program = 0;
    state.Apply();

-    glEnable(GL_BLEND);
-
    glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);

    LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
@@ -124,18 +124,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo

 RasterizerOpenGL::~RasterizerOpenGL() {}

-void RasterizerOpenGL::SetupVertexArrays() {
-    MICROPROFILE_SCOPE(OpenGL_VAO);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+void RasterizerOpenGL::SetupVertexFormat() {
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
    const auto& regs = gpu.regs;

+    if (!gpu.dirty_flags.vertex_attrib_format)
+        return;
+    gpu.dirty_flags.vertex_attrib_format = false;
+
+    MICROPROFILE_SCOPE(OpenGL_VAO);
+
    auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
    auto& VAO = iter->second;

    if (is_cache_miss) {
        VAO.Create();
        state.draw.vertex_array = VAO.handle;
-        state.Apply();
+        state.ApplyVertexBufferState();

        // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
        // around.
@@ -177,8 +182,13 @@ void RasterizerOpenGL::SetupVertexArrays() {
        }
    }
    state.draw.vertex_array = VAO.handle;
-    state.draw.vertex_buffer = buffer_cache.GetHandle();
-    state.Apply();
+    state.ApplyVertexBufferState();
+}
+
+void RasterizerOpenGL::SetupVertexBuffer() {
+    MICROPROFILE_SCOPE(OpenGL_VB);
+    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& regs = gpu.regs;

    // Upload all guest vertex arrays sequentially to our buffer
    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -205,6 +215,9 @@ void RasterizerOpenGL::SetupVertexArrays() {
            glVertexBindingDivisor(index, 0);
        }
    }
+
+    // Implicit set by glBindVertexBuffer. Stupid glstate handling...
+    state.draw.vertex_buffer = buffer_cache.GetHandle();
 }

 DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -329,8 +342,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
            index++;
        }
    }
-
-    state.Apply();
 }

 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -399,9 +410,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
        cached_pages.add({pages_interval, delta});
 }

-void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
-                                             bool preserve_contents,
-                                             boost::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
+                                             bool using_depth_fb, bool preserve_contents,
+                                             std::optional<std::size_t> single_color_target) {
    MICROPROFILE_SCOPE(OpenGL_Framebuffer);
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

@@ -416,8 +427,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
    ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");

    // Bind the framebuffer surfaces
-    state.draw.draw_framebuffer = framebuffer.handle;
-    state.Apply();
+    current_state.draw.draw_framebuffer = framebuffer.handle;
+    current_state.ApplyFramebufferState();
+    current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;

    if (using_color_fb) {
        if (single_color_target) {
@@ -429,6 +441,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
                // Assume that a surface will be written to if it is used as a framebuffer, even if
                // the shader doesn't actually write to it.
                color_surface->MarkAsModified(true, res_cache);
+                // Workaround for and issue in nvidia drivers
+                // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+                state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
            }

            glFramebufferTexture2D(
@@ -446,6 +461,11 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
                    // Assume that a surface will be written to if it is used as a framebuffer, even
                    // if the shader doesn't actually write to it.
                    color_surface->MarkAsModified(true, res_cache);
+                    // Enable sRGB only for supported formats
+                    // Workaround for and issue in nvidia drivers
+                    // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+                    state.framebuffer_srgb.enabled |=
+                        color_surface->GetSurfaceParams().srgb_conversion;
                }

                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
@@ -487,10 +507,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
                               0);
    }
-
-    SyncViewport();
-
-    state.Apply();
+    SyncViewport(current_state);
 }

 void RasterizerOpenGL::Clear() {
@@ -503,22 +520,23 @@ void RasterizerOpenGL::Clear() {
    bool use_stencil{};

    OpenGLState clear_state;
-    clear_state.draw.draw_framebuffer = framebuffer.handle;
-    clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
-    clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
-
    if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
        regs.clear_buffers.A) {
        use_color = true;
    }
+    if (use_color) {
+        clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
+        clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+    }
    if (regs.clear_buffers.Z) {
        ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
        use_depth = true;

        // Always enable the depth write when clearing the depth buffer. The depth write mask is
-        // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
+        // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
+        // true.
        clear_state.depth.test_enabled = true;
        clear_state.depth.test_func = GL_ALWAYS;
    }
@@ -535,9 +553,8 @@ void RasterizerOpenGL::Clear() {

    ScopeAcquireGLContext acquire_context{emu_window};

-    ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+    ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
                          regs.clear_buffers.RT.Value());
-
    clear_state.Apply();

    if (use_color) {
@@ -563,14 +580,14 @@ void RasterizerOpenGL::DrawArrays() {

    ScopeAcquireGLContext acquire_context{emu_window};

-    ConfigureFramebuffers();
-
+    ConfigureFramebuffers(state);
+    SyncColorMask();
    SyncDepthTestState();
    SyncStencilTestState();
    SyncBlendState();
    SyncLogicOpState();
    SyncCullMode();
-    SyncDepthRange();
+    SyncPrimitiveRestart();
    SyncScissorTest();
    // Alpha Testing is synced on shaders.
    SyncTransformFeedback();
@@ -584,7 +601,7 @@ void RasterizerOpenGL::DrawArrays() {
    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;

    state.draw.vertex_buffer = buffer_cache.GetHandle();
-    state.Apply();
+    state.ApplyVertexBufferState();

    std::size_t buffer_size = CalculateVertexArraysSize();

@@ -611,7 +628,8 @@ void RasterizerOpenGL::DrawArrays() {

    buffer_cache.Map(buffer_size);

-    SetupVertexArrays();
+    SetupVertexFormat();
+    SetupVertexBuffer();
    DrawParameters params = SetupDraw();
    SetupShaders(params.primitive_mode);

@@ -691,7 +709,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,

    // Verify that the cached surface is the same size and format as the requested framebuffer
    const auto& params{surface->GetSurfaceParams()};
-    const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+    const auto& pixel_format{
+        VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
    ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
    ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
    ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
@@ -714,16 +733,20 @@ void RasterizerOpenGL::SamplerInfo::Create() {
    glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
 }

-void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
+void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::FullTextureInfo& info) {
    const GLuint s = sampler.handle;
-
+    const Tegra::Texture::TSCEntry& config = info.tsc;
    if (mag_filter != config.mag_filter) {
        mag_filter = config.mag_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter));
+        glSamplerParameteri(
+            s, GL_TEXTURE_MAG_FILTER,
+            MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
    }
-    if (min_filter != config.min_filter) {
+    if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
        min_filter = config.min_filter;
-        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, MaxwellToGL::TextureFilterMode(min_filter));
+        mip_filter = config.mip_filter;
+        glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
+                            MaxwellToGL::TextureFilterMode(min_filter, mip_filter));
    }

    if (wrap_u != config.wrap_u) {
@@ -763,6 +786,22 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
            glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data());
        }
    }
+    if (info.tic.use_header_opt_control == 0) {
+        if (GLAD_GL_ARB_texture_filter_anisotropic) {
+            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY,
+                                static_cast<float>(1 << info.tic.max_anisotropy.Value()));
+        } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
+            glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT,
+                                static_cast<float>(1 << info.tic.max_anisotropy.Value()));
+        }
+        glSamplerParameterf(s, GL_TEXTURE_MIN_LOD,
+                            static_cast<float>(info.tic.res_min_mip_level.Value()));
+        glSamplerParameterf(s, GL_TEXTURE_MAX_LOD,
+                            static_cast<float>(info.tic.res_max_mip_level.Value() == 0
+                                                   ? 16
+                                                   : info.tic.res_max_mip_level.Value()));
+        glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, info.tic.mip_lod_bias.Value() / 256.f);
+    }
 }

 u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
@@ -860,7 +899,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
            continue;
        }

-        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+        texture_samplers[current_bindpoint].SyncWithConfig(texture);
        Surface surface = res_cache.GetTextureSurface(texture, entry);
        if (surface != nullptr) {
            state.texture_units[current_bindpoint].texture = surface->Texture().handle;
@@ -882,14 +921,18 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
    return current_unit + static_cast<u32>(entries.size());
 }

-void RasterizerOpenGL::SyncViewport() {
+void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-
-    state.viewport.x = viewport_rect.left;
-    state.viewport.y = viewport_rect.bottom;
-    state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
-    state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+        auto& viewport = current_state.viewports[i];
+        viewport.x = viewport_rect.left;
+        viewport.y = viewport_rect.bottom;
+        viewport.width = static_cast<GLfloat>(viewport_rect.GetWidth());
+        viewport.height = static_cast<GLfloat>(viewport_rect.GetHeight());
+        viewport.depth_range_far = regs.viewport[i].depth_range_far;
+        viewport.depth_range_near = regs.viewport[i].depth_range_near;
+    }
 }

 void RasterizerOpenGL::SyncClipEnabled() {
@@ -924,11 +967,11 @@ void RasterizerOpenGL::SyncCullMode() {
    }
 }

-void RasterizerOpenGL::SyncDepthRange() {
+void RasterizerOpenGL::SyncPrimitiveRestart() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

-    state.depth.depth_range_near = regs.viewport->depth_range_near;
-    state.depth.depth_range_far = regs.viewport->depth_range_far;
+    state.primitive_restart.enabled = regs.primitive_restart.enabled;
+    state.primitive_restart.index = regs.primitive_restart.index;
 }

 void RasterizerOpenGL::SyncDepthTestState() {
@@ -951,9 +994,6 @@ void RasterizerOpenGL::SyncStencilTestState() {
        return;
    }

-    // TODO(bunnei): Verify behavior when this is not set
-    ASSERT(regs.stencil_two_side_enable);
-
    state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
    state.stencil.front.test_ref = regs.stencil_front_func_ref;
    state.stencil.front.test_mask = regs.stencil_front_func_mask;
@@ -961,36 +1001,79 @@ void RasterizerOpenGL::SyncStencilTestState() {
    state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
    state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
    state.stencil.front.write_mask = regs.stencil_front_mask;
+    if (regs.stencil_two_side_enable) {
+        state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
+        state.stencil.back.test_ref = regs.stencil_back_func_ref;
+        state.stencil.back.test_mask = regs.stencil_back_func_mask;
+        state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
+        state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
+        state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
+        state.stencil.back.write_mask = regs.stencil_back_mask;
+    } else {
+        state.stencil.back.test_func = GL_ALWAYS;
+        state.stencil.back.test_ref = 0;
+        state.stencil.back.test_mask = 0xFFFFFFFF;
+        state.stencil.back.write_mask = 0xFFFFFFFF;
+        state.stencil.back.action_stencil_fail = GL_KEEP;
+        state.stencil.back.action_depth_fail = GL_KEEP;
+        state.stencil.back.action_depth_pass = GL_KEEP;
+    }
+}

-    state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
-    state.stencil.back.test_ref = regs.stencil_back_func_ref;
-    state.stencil.back.test_mask = regs.stencil_back_func_mask;
-    state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
-    state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
-    state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
-    state.stencil.back.write_mask = regs.stencil_back_mask;
+void RasterizerOpenGL::SyncColorMask() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
+        auto& dest = state.color_mask[i];
+        dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
+        dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
+        dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
+        dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
+    }
 }

 void RasterizerOpenGL::SyncBlendState() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

-    // TODO(Subv): Support more than just render target 0.
-    state.blend.enabled = regs.blend.enable[0] != 0;
+    state.blend_color.red = regs.blend_color.r;
+    state.blend_color.green = regs.blend_color.g;
+    state.blend_color.blue = regs.blend_color.b;
+    state.blend_color.alpha = regs.blend_color.a;

-    if (!state.blend.enabled)
+    state.independant_blend.enabled = regs.independent_blend_enable;
+    if (!state.independant_blend.enabled) {
+        auto& blend = state.blend[0];
+        blend.enabled = regs.blend.enable[0] != 0;
+        blend.separate_alpha = regs.blend.separate_alpha;
+        blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+        blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+        blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+        if (blend.separate_alpha) {
+            blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+            blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+            blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+        }
+        for (size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+            state.blend[i].enabled = false;
+        }
        return;
+    }

-    ASSERT_MSG(regs.logic_op.enable == 0,
-               "Blending and logic op can't be enabled at the same time.");
-
-    ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
-    ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
-    state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
-    state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
-    state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
-    state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
-    state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
-    state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
+    for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+        auto& blend = state.blend[i];
+        blend.enabled = regs.blend.enable[i] != 0;
+        if (!blend.enabled)
+            continue;
+        blend.separate_alpha = regs.independent_blend[i].separate_alpha;
+        blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_rgb);
+        blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_rgb);
+        blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_rgb);
+        if (blend.separate_alpha) {
+            blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_a);
+            blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_a);
+            blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_a);
+        }
+    }
 }

 void RasterizerOpenGL::SyncLogicOpState() {
@@ -1009,19 +1092,19 @@ void RasterizerOpenGL::SyncLogicOpState() {
 }

 void RasterizerOpenGL::SyncScissorTest() {
+    // TODO: what is the correct behavior here, a single scissor for all targets
+    // or scissor disabled for the rest of the targets?
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
    state.scissor.enabled = (regs.scissor_test.enable != 0);
-    // TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
-    // implemented.
-    if (regs.scissor_test.enable != 0) {
-        const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
-        const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
-        state.scissor.x = regs.scissor_test.min_x;
-        state.scissor.y = regs.scissor_test.min_y;
-        state.scissor.width = width;
-        state.scissor.height = height;
+    if (regs.scissor_test.enable == 0) {
+        return;
    }
+    const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
+    const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
+    state.scissor.x = regs.scissor_test.min_x;
+    state.scissor.y = regs.scissor_test.min_y;
+    state.scissor.width = width;
+    state.scissor.height = height;
 }

 void RasterizerOpenGL::SyncTransformFeedback() {
@@ -1046,9 +1129,8 @@ void RasterizerOpenGL::CheckAlphaTests() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

    if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
-        LOG_CRITICAL(
-            Render_OpenGL,
-            "Alpha Testing is enabled with Multiple Render Targets, this behavior is undefined.");
+        LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
+                                    "this behavior is undefined.");
        UNREACHABLE();
    }
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -8,12 +8,12 @@
 #include <cstddef>
 #include <map>
 #include <memory>
+#include <optional>
 #include <tuple>
 #include <utility>
 #include <vector>

 #include <boost/icl/interval_map.hpp>
-#include <boost/optional.hpp>
 #include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>

@@ -88,11 +88,12 @@ private:
        /// SamplerInfo struct.
        void Create();
        /// Syncs the sampler object with the config, updating any necessary state.
-        void SyncWithConfig(const Tegra::Texture::TSCEntry& config);
+        void SyncWithConfig(const Tegra::Texture::FullTextureInfo& info);

    private:
        Tegra::Texture::TextureFilter mag_filter;
        Tegra::Texture::TextureFilter min_filter;
+        Tegra::Texture::TextureMipmapFilter mip_filter;
        Tegra::Texture::WrapMode wrap_u;
        Tegra::Texture::WrapMode wrap_v;
        Tegra::Texture::WrapMode wrap_p;
@@ -108,9 +109,9 @@ private:
     * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
     * @param single_color_target Specifies if a single color buffer target should be used.
     */
-    void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
-                               bool preserve_contents = true,
-                               boost::optional<std::size_t> single_color_target = {});
+    void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
+                               bool using_depth_fb = true, bool preserve_contents = true,
+                               std::optional<std::size_t> single_color_target = {});

    /*
     * Configures the current constbuffers to use for the draw command.
@@ -132,8 +133,8 @@ private:
    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
                      GLenum primitive_mode, u32 current_unit);

-    /// Syncs the viewport to match the guest state
-    void SyncViewport();
+    /// Syncs the viewport and depth range to match the guest state
+    void SyncViewport(OpenGLState& current_state);

    /// Syncs the clip enabled status to match the guest state
    void SyncClipEnabled();
@@ -144,8 +145,8 @@ private:
    /// Syncs the cull mode to match the guest state
    void SyncCullMode();

-    /// Syncs the depth range to match the guest state
-    void SyncDepthRange();
+    /// Syncs the primitve restart to match the guest state
+    void SyncPrimitiveRestart();

    /// Syncs the depth test state to match the guest state
    void SyncDepthTestState();
@@ -168,6 +169,9 @@ private:
    /// Syncs the point state to match the guest state
    void SyncPointState();

+    /// Syncs Color Mask
+    void SyncColorMask();
+
    /// Check asserts for alpha testing.
    void CheckAlphaTests();

@@ -203,7 +207,8 @@ private:

    std::size_t CalculateIndexBufferSize() const;

-    void SetupVertexArrays();
+    void SetupVertexFormat();
+    void SetupVertexBuffer();

    DrawParameters SetupDraw();

--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,16 +15,24 @@
 #include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
+#include "video_core/surface.h"
 #include "video_core/textures/astc.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/utils.h"

 namespace OpenGL {

-using SurfaceType = SurfaceParams::SurfaceType;
-using PixelFormat = SurfaceParams::PixelFormat;
-using ComponentType = SurfaceParams::ComponentType;
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;

 struct FormatTuple {
    GLint internal_format;
@@ -34,34 +42,6 @@ struct FormatTuple {
    bool compressed;
 };

-static bool IsPixelFormatASTC(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_5X4:
-    case PixelFormat::ASTC_2D_8X8:
-    case PixelFormat::ASTC_2D_8X5:
-        return true;
-    default:
-        return false;
-    }
-}
-
-static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::ASTC_2D_4X4:
-        return {4, 4};
-    case PixelFormat::ASTC_2D_5X4:
-        return {5, 4};
-    case PixelFormat::ASTC_2D_8X8:
-        return {8, 8};
-    case PixelFormat::ASTC_2D_8X5:
-        return {8, 5};
-    default:
-        LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
-        UNREACHABLE();
-    }
-}
-
 void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
    const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
@@ -78,27 +58,34 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
    }
 }

-std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
-    const u32 compression_factor{GetCompressionFactor(pixel_format)};
+std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
+                                                 bool uncompressed) const {
+    const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+    const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
    const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
    u32 m_depth = (layer_only ? 1U : depth);
-    u32 m_width = std::max(1U, width / compression_factor);
-    u32 m_height = std::max(1U, height / compression_factor);
-    std::size_t size = Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height,
-                                                     m_depth, block_height, block_depth);
-    u32 m_block_height = block_height;
-    u32 m_block_depth = block_depth;
-    std::size_t block_size_bytes = 512 * block_height * block_depth; // 512 is GOB size
-    for (u32 i = 1; i < max_mip_level; i++) {
-        m_width = std::max(1U, m_width / 2);
-        m_height = std::max(1U, m_height / 2);
-        m_depth = std::max(1U, m_depth / 2);
-        m_block_height = std::max(1U, m_block_height / 2);
-        m_block_depth = std::max(1U, m_block_depth / 2);
-        size += Tegra::Texture::CalculateSize(is_tiled, bytes_per_pixel, m_width, m_height, m_depth,
-                                              m_block_height, m_block_depth);
+    u32 m_width = MipWidth(mip_level);
+    u32 m_height = MipHeight(mip_level);
+    m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
+    m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
+    m_depth = std::max(1U, m_depth >> mip_level);
+    u32 m_block_height = MipBlockHeight(mip_level);
+    u32 m_block_depth = MipBlockDepth(mip_level);
+    return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
+                                         m_height, m_depth, m_block_height, m_block_depth);
+}
+
+std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
+                                           bool uncompressed) const {
+    std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
+    std::size_t size = 0;
+    for (u32 i = 0; i < max_mip_level; i++) {
+        size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
    }
-    return is_tiled ? Common::AlignUp(size, block_size_bytes) : size;
+    if (!force_gl && is_tiled) {
+        size = Common::AlignUp(size, block_size_bytes);
+    }
+    return size;
 }

 /*static*/ SurfaceParams SurfaceParams::CreateForTexture(
@@ -108,8 +95,9 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
-    params.pixel_format =
-        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value());
+    params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
+    params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
+                                                       params.srgb_conversion);
    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
    params.type = GetFormatType(params.pixel_format);
    params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
@@ -140,6 +128,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
            params.target = SurfaceTarget::Texture2D;
        }
        break;
+    case SurfaceTarget::TextureCubeArray:
+        params.depth = config.tic.Depth() * 6;
+        if (!entry.IsArray()) {
+            ASSERT(params.depth == 6);
+            params.target = SurfaceTarget::TextureCubemap;
+        }
+        break;
    default:
        LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
        UNREACHABLE();
@@ -166,6 +161,8 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_height = 1 << config.memory_layout.block_height;
    params.block_depth = 1 << config.memory_layout.block_depth;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
    params.width = config.width;
@@ -173,7 +170,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.is_layered = false;

    // Render target specific parameters, not used for caching
@@ -201,12 +198,13 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.pixel_format = PixelFormatFromDepthFormat(format);
    params.component_type = ComponentTypeFromDepthFormat(format);
    params.type = GetFormatType(params.pixel_format);
+    params.srgb_conversion = false;
    params.width = zeta_width;
    params.height = zeta_height;
    params.unaligned_height = zeta_height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.is_layered = false;
    params.rt = {};

@@ -224,6 +222,8 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+    params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+                             config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
    params.width = config.width;
@@ -231,7 +231,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    params.unaligned_height = config.height;
    params.target = SurfaceTarget::Texture2D;
    params.depth = 1;
-    params.max_mip_level = 0;
+    params.max_mip_level = 1;
    params.rt = {};

    params.InitCacheParameters(config.Address());
@@ -239,7 +239,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool layer_only) const {
    return params;
 }

-static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
    {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false},                     // ABGR8S
    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},   // ABGR8UI
@@ -255,7 +255,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
     false},                                                                     // R11FG11FB10F
    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
     true}, // DXT1
    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
     true}, // DXT23
@@ -289,14 +289,33 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false},           // RG16I
    {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false},             // RG16S
    {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false},                // RGB32F
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
+     false},                                                                   // RGBA8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // RG8U
+    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                     // RG8S
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // RG32UI
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // R32UI
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X5
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+    // Compressed sRGB formats
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT1_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT23_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT45_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8,
+     ComponentType::UNorm, true},                                              // BC7U_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X5
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_10X8
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB

    // Depth formats
    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -312,20 +331,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     ComponentType::Float, false}, // Z32FS8
 }};

-static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+static GLenum SurfaceTargetToGL(SurfaceTarget target) {
    switch (target) {
-    case SurfaceParams::SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1D:
        return GL_TEXTURE_1D;
-    case SurfaceParams::SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2D:
        return GL_TEXTURE_2D;
-    case SurfaceParams::SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture3D:
        return GL_TEXTURE_3D;
-    case SurfaceParams::SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture1DArray:
        return GL_TEXTURE_1D_ARRAY;
-    case SurfaceParams::SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::Texture2DArray:
        return GL_TEXTURE_2D_ARRAY;
-    case SurfaceParams::SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubemap:
        return GL_TEXTURE_CUBE_MAP;
+    case SurfaceTarget::TextureCubeArray:
+        return GL_TEXTURE_CUBE_MAP_ARRAY_ARB;
    }
    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
    UNREACHABLE();
@@ -340,55 +361,41 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
    return format;
 }

-MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
-    u32 actual_height{unaligned_height};
+MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+    u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
    if (IsPixelFormatASTC(pixel_format)) {
        // ASTC formats must stop at the ATSC block size boundary
        actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
    }
-    return {0, actual_height, width, 0};
-}
-
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-static bool IsFormatBCn(PixelFormat format) {
-    switch (format) {
-    case PixelFormat::DXT1:
-    case PixelFormat::DXT23:
-    case PixelFormat::DXT45:
-    case PixelFormat::DXN1:
-    case PixelFormat::DXN2SNORM:
-    case PixelFormat::DXN2UNORM:
-    case PixelFormat::BC7U:
-    case PixelFormat::BC6H_UF16:
-    case PixelFormat::BC6H_SF16:
-        return true;
-    }
-    return false;
+    return {0, actual_height, MipWidth(mip_level), 0};
 }

 template <bool morton_to_gl, PixelFormat format>
 void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
                std::size_t gl_buffer_size, VAddr addr) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format);
+    constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);

    // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
    // pixel values.
-    const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
+    const u32 tile_size_x{GetDefaultBlockWidth(format)};
+    const u32 tile_size_y{GetDefaultBlockHeight(format)};

    if (morton_to_gl) {
-        const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
-            addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
+        const std::vector<u8> data =
+            Tegra::Texture::UnswizzleTexture(addr, tile_size_x, tile_size_y, bytes_per_pixel,
+                                             stride, height, depth, block_height, block_depth);
        const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
        memcpy(gl_buffer, data.data(), size_to_copy);
    } else {
-        Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
+        Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+                                         (height + tile_size_y - 1) / tile_size_y, depth,
                                         bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
                                         gl_buffer, false, block_height, block_depth);
    }
 }

 using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
-                                     SurfaceParams::MaxPixelFormat>;
+                                     VideoCore::Surface::MaxPixelFormat>;

 static constexpr GLConversionArray morton_to_gl_fns = {
    // clang-format off
@@ -432,7 +439,7 @@ static constexpr GLConversionArray morton_to_gl_fns = {
        MortonCopy<true, PixelFormat::RG16I>,
        MortonCopy<true, PixelFormat::RG16S>,
        MortonCopy<true, PixelFormat::RGB32F>,
-        MortonCopy<true, PixelFormat::SRGBA8>,
+        MortonCopy<true, PixelFormat::RGBA8_SRGB>,
        MortonCopy<true, PixelFormat::RG8U>,
        MortonCopy<true, PixelFormat::RG8S>,
        MortonCopy<true, PixelFormat::RG32UI>,
@@ -440,6 +447,19 @@ static constexpr GLConversionArray morton_to_gl_fns = {
        MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
        MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
        MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+        MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+        MortonCopy<true, PixelFormat::DXT1_SRGB>,
+        MortonCopy<true, PixelFormat::DXT23_SRGB>,
+        MortonCopy<true, PixelFormat::DXT45_SRGB>,
+        MortonCopy<true, PixelFormat::BC7U_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+        MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+        MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+        MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
        MortonCopy<true, PixelFormat::Z32F>,
        MortonCopy<true, PixelFormat::Z16>,
        MortonCopy<true, PixelFormat::Z24S8>,
@@ -491,7 +511,7 @@ static constexpr GLConversionArray gl_to_morton_fns = {
        MortonCopy<false, PixelFormat::RG16I>,
        MortonCopy<false, PixelFormat::RG16S>,
        MortonCopy<false, PixelFormat::RGB32F>,
-        MortonCopy<false, PixelFormat::SRGBA8>,
+        MortonCopy<false, PixelFormat::RGBA8_SRGB>,
        MortonCopy<false, PixelFormat::RG8U>,
        MortonCopy<false, PixelFormat::RG8S>,
        MortonCopy<false, PixelFormat::RG32UI>,
@@ -499,6 +519,19 @@ static constexpr GLConversionArray gl_to_morton_fns = {
        nullptr,
        nullptr,
        nullptr,
+        MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+        MortonCopy<false, PixelFormat::DXT1_SRGB>,
+        MortonCopy<false, PixelFormat::DXT23_SRGB>,
+        MortonCopy<false, PixelFormat::DXT45_SRGB>,
+        MortonCopy<false, PixelFormat::BC7U_SRGB>,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
        MortonCopy<false, PixelFormat::Z32F>,
        MortonCopy<false, PixelFormat::Z16>,
        MortonCopy<false, PixelFormat::Z24S8>,
@@ -508,34 +541,39 @@ static constexpr GLConversionArray gl_to_morton_fns = {
 };

 void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
-                 std::vector<u8>& gl_buffer) {
-    u32 depth = params.depth;
-    if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
+                 std::vector<u8>& gl_buffer, u32 mip_level) {
+    u32 depth = params.MipDepth(mip_level);
+    if (params.target == SurfaceTarget::Texture2D) {
        // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
        depth = 1U;
    }
    if (params.is_layered) {
-        u64 offset = 0;
+        u64 offset = params.GetMipmapLevelOffset(mip_level);
        u64 offset_gl = 0;
        u64 layer_size = params.LayerMemorySize();
-        u64 gl_size = params.LayerSizeGL();
-        for (u32 i = 0; i < depth; i++) {
+        u64 gl_size = params.LayerSizeGL(mip_level);
+        for (u32 i = 0; i < params.depth; i++) {
            functions[static_cast<std::size_t>(params.pixel_format)](
-                params.width, params.block_height, params.height, params.block_depth, 1,
+                params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+                params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1,
                gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
            offset += layer_size;
            offset_gl += gl_size;
        }
    } else {
+        u64 offset = params.GetMipmapLevelOffset(mip_level);
        functions[static_cast<std::size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, params.block_depth, depth,
-            gl_buffer.data(), gl_buffer.size(), params.addr);
+            params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+            params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
+            gl_buffer.size(), params.addr + offset);
    }
 }

+MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
 static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                        GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+    MICROPROFILE_SCOPE(OpenGL_BlitSurface);

    const auto& src_params{src_surface->GetSurfaceParams()};
    const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -546,19 +584,21 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
    OpenGLState state;
    state.draw.read_framebuffer = read_fb_handle;
    state.draw.draw_framebuffer = draw_fb_handle;
-    state.Apply();
+    // Set sRGB enabled if the destination surfaces need it
+    state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
+    state.ApplyFramebufferState();

    u32 buffers{};

    if (src_params.type == SurfaceType::ColorTexture) {
        switch (src_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                   GL_TEXTURE_2D, src_surface->Texture().handle, 0);
            glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
                                   0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glFramebufferTexture2D(
                GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -567,12 +607,12 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture2DArray:
            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                      src_surface->Texture().handle, 0, 0);
            glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture3D:
            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
                                   SurfaceTargetToGL(src_params.target),
                                   src_surface->Texture().handle, 0, 0);
@@ -588,13 +628,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
        }

        switch (dst_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                   GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
                                   0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glFramebufferTexture2D(
                GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -603,13 +643,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
                GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
                static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture2DArray:
            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                      dst_surface->Texture().handle, 0, 0);
            glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
            break;

-        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture3D:
            glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
                                   SurfaceTargetToGL(dst_params.target),
                                   dst_surface->Texture().handle, 0, 0);
@@ -673,9 +713,11 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
                       0, 0, width, height, 1);
 }

+MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
 static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
                        GLuint copy_pbo_handle, GLenum src_attachment = 0,
                        GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+    MICROPROFILE_SCOPE(OpenGL_CopySurface);
    ASSERT_MSG(dst_attachment == 0, "Unimplemented");

    const auto& src_params{src_surface->GetSurfaceParams()};
@@ -730,21 +772,22 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
        UNREACHABLE();
    } else {
        switch (dst_params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
+        case SurfaceTarget::Texture1D:
            glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
                                dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
+        case SurfaceTarget::Texture2D:
            glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
                                dest_format.format, dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
                                static_cast<GLsizei>(dst_params.depth), dest_format.format,
                                dest_format.type, nullptr);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap:
            glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
                                static_cast<GLint>(cubemap_face), width, height, 1,
                                dest_format.format, dest_format.type, nullptr);
@@ -781,35 +824,43 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
    if (!format_tuple.compressed) {
        // Only pre-create the texture for non-compressed textures.
        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth());
+        case SurfaceTarget::Texture1D:
+            glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth());
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
-            glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight());
+        case SurfaceTarget::Texture2D:
+        case SurfaceTarget::TextureCubemap:
+            glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
-                           rect.GetWidth(), rect.GetHeight(), params.depth);
+        case SurfaceTarget::Texture3D:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
+                           format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
+                           params.depth);
            break;
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
-                           rect.GetHeight());
+            glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight());
        }
    }

    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL,
+                    params.max_mip_level - 1);
+    if (params.max_mip_level == 1) {
+        glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0);
+    }

-    VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
-                             SurfaceParams::SurfaceTargetName(params.target));
+    LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
+                  SurfaceParams::SurfaceTargetName(params.target));

    // Clamp size to mapped GPU memory region
    // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -839,7 +890,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo

    S8Z24 s8z24_pixel{};
    Z24S8 z24s8_pixel{};
-    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)};
+    constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
@@ -859,7 +910,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
 }

 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)};
+    constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)};
    for (std::size_t y = 0; y < height; ++y) {
        for (std::size_t x = 0; x < width; ++x) {
            const std::size_t offset{bpp * (y * width + x)};
@@ -876,17 +927,26 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
 * typical desktop GPUs.
 */
 static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
-                                               u32 width, u32 height) {
+                                               u32 width, u32 height, u32 depth) {
    switch (pixel_format) {
    case PixelFormat::ASTC_2D_4X4:
    case PixelFormat::ASTC_2D_8X8:
    case PixelFormat::ASTC_2D_8X5:
-    case PixelFormat::ASTC_2D_5X4: {
+    case PixelFormat::ASTC_2D_5X4:
+    case PixelFormat::ASTC_2D_5X5:
+    case PixelFormat::ASTC_2D_4X4_SRGB:
+    case PixelFormat::ASTC_2D_8X8_SRGB:
+    case PixelFormat::ASTC_2D_8X5_SRGB:
+    case PixelFormat::ASTC_2D_5X4_SRGB:
+    case PixelFormat::ASTC_2D_5X5_SRGB:
+    case PixelFormat::ASTC_2D_10X8:
+    case PixelFormat::ASTC_2D_10X8_SRGB: {
        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
        u32 block_width{};
        u32 block_height{};
        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
-        data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+        data =
+            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
        break;
    }
    case PixelFormat::S8Z24:
@@ -913,7 +973,13 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
    case PixelFormat::G8R8U:
    case PixelFormat::G8R8S:
    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::ASTC_2D_8X8: {
+    case PixelFormat::ASTC_2D_8X8:
+    case PixelFormat::ASTC_2D_4X4_SRGB:
+    case PixelFormat::ASTC_2D_8X8_SRGB:
+    case PixelFormat::ASTC_2D_5X5:
+    case PixelFormat::ASTC_2D_5X5_SRGB:
+    case PixelFormat::ASTC_2D_10X8:
+    case PixelFormat::ASTC_2D_10X8_SRGB: {
        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
                     static_cast<u32>(pixel_format));
        UNREACHABLE();
@@ -926,23 +992,25 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
    }
 }

-MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
+MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
 void CachedSurface::LoadGLBuffer() {
    MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-
-    gl_buffer.resize(params.size_in_bytes_gl);
+    gl_buffer.resize(params.max_mip_level);
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        gl_buffer[i].resize(params.GetMipmapSizeGL(i));
    if (params.is_tiled) {
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                   params.block_width, static_cast<u32>(params.target));
-
-        SwizzleFunc(morton_to_gl_fns, params, gl_buffer);
+        for (u32 i = 0; i < params.max_mip_level; i++)
+            SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i);
    } else {
        const auto texture_src_data{Memory::GetPointer(params.addr)};
        const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
-        gl_buffer.assign(texture_src_data, texture_src_data_end);
+        gl_buffer[0].assign(texture_src_data, texture_src_data_end);
    }
-
-    ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
+                                           params.MipHeight(i), params.MipDepth(i));
 }

 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -952,18 +1020,19 @@ void CachedSurface::FlushGLBuffer() {
    ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");

    // OpenGL temporary buffer needs to be big enough to store raw texture size
-    gl_buffer.resize(GetSizeInBytes());
+    gl_buffer.resize(1);
+    gl_buffer[0].resize(GetSizeInBytes());

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
    ASSERT(!tuple.compressed);
    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
-    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
-                      gl_buffer.data());
+    glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
+                      static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
-    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
                                        params.height);
    ASSERT(params.type != SurfaceType::Fill);
    const u8* const texture_src_data = Memory::GetPointer(params.addr);
@@ -972,28 +1041,23 @@ void CachedSurface::FlushGLBuffer() {
        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
                   params.block_width, static_cast<u32>(params.target));

-        SwizzleFunc(gl_to_morton_fns, params, gl_buffer);
+        SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0);
    } else {
-        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
+        std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
    }
 }

-MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
-    if (params.type == SurfaceType::Fill)
-        return;
-
-    MICROPROFILE_SCOPE(OpenGL_TextureUL);
-
-    const auto& rect{params.GetRect()};
+void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
+                                          GLuint draw_fb_handle) {
+    const auto& rect{params.GetRect(mip_map)};

    // Load data from memory to the surface
    const GLint x0 = static_cast<GLint>(rect.left);
    const GLint y0 = static_cast<GLint>(rect.bottom);
    std::size_t buffer_offset =
-        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+        static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
                                 static_cast<std::size_t>(x0)) *
-        SurfaceParams::GetBytesPerPixel(params.pixel_format);
+        GetBytesPerPixel(params.pixel_format);

    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
    const GLuint target_tex = texture.handle;
@@ -1009,89 +1073,120 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
    cur_state.Apply();

    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
-    ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
-    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
+    ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));

+    GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
    glActiveTexture(GL_TEXTURE0);
    if (tuple.compressed) {
        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture2D:
-            glCompressedTexImage2D(
-                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
-                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+        case SurfaceTarget::Texture2D:
+            glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glCompressedTexImage3D(
-                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
-                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
-                static_cast<GLsizei>(params.depth), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+        case SurfaceTarget::Texture3D:
+            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
+                                   static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)),
+                                   static_cast<GLsizei>(params.depth), 0, image_size,
+                                   &gl_buffer[mip_map][buffer_offset]);
+            break;
+        case SurfaceTarget::TextureCubemap: {
+            GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
            for (std::size_t face = 0; face < params.depth; ++face) {
                glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
-                                       0, tuple.internal_format, static_cast<GLsizei>(params.width),
-                                       static_cast<GLsizei>(params.height), 0,
-                                       static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()),
-                                       &gl_buffer[buffer_offset]);
-                buffer_offset += params.SizeInBytesCubeFace();
+                                       mip_map, tuple.internal_format,
+                                       static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                       static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+                                       layer_size, &gl_buffer[mip_map][buffer_offset]);
+                buffer_offset += layer_size;
            }
            break;
+        }
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glCompressedTexImage2D(
-                GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
-                static_cast<GLsizei>(params.height), 0,
-                static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+            glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
+                                   static_cast<GLsizei>(params.MipWidth(mip_map)),
+                                   static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+                                   static_cast<GLsizei>(params.size_in_bytes_gl),
+                                   &gl_buffer[mip_map][buffer_offset]);
        }
    } else {

        switch (params.target) {
-        case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+        case SurfaceTarget::Texture1D:
+            glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture2D:
-            glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+        case SurfaceTarget::Texture2D:
+            glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
                            static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::Texture3D:
-        case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+        case SurfaceTarget::Texture3D:
+            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
+                            static_cast<GLsizei>(rect.GetWidth()),
+                            static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+                            tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+            break;
+        case SurfaceTarget::Texture2DArray:
+        case SurfaceTarget::TextureCubeArray:
+            glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
                            static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
-                            tuple.type, &gl_buffer[buffer_offset]);
+                            tuple.type, &gl_buffer[mip_map][buffer_offset]);
            break;
-        case SurfaceParams::SurfaceTarget::TextureCubemap:
+        case SurfaceTarget::TextureCubemap: {
+            std::size_t start = buffer_offset;
            for (std::size_t face = 0; face < params.depth; ++face) {
-                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
-                                y0, static_cast<GLsizei>(rect.GetWidth()),
+                glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
+                                x0, y0, static_cast<GLsizei>(rect.GetWidth()),
                                static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                                &gl_buffer[buffer_offset]);
-                buffer_offset += params.SizeInBytesCubeFace();
+                                &gl_buffer[mip_map][buffer_offset]);
+                buffer_offset += params.LayerSizeGL(mip_map);
            }
            break;
+        }
        default:
            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                         static_cast<u32>(params.target));
            UNREACHABLE();
-            glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+            glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                            &gl_buffer[buffer_offset]);
+                            &gl_buffer[mip_map][buffer_offset]);
        }
    }

    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }

-RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
+MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
+void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+    if (params.type == SurfaceType::Fill)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_TextureUL);
+
+    for (u32 i = 0; i < params.max_mip_level; i++)
+        UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+}
+
+RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
+    : RasterizerCache{rasterizer} {
    read_framebuffer.Create();
    draw_framebuffer.Create();
    copy_pbo.Create();
@@ -1231,8 +1326,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    // For compatible surfaces, we can just do fast glCopyImageSubData based copy
    if (old_params.target == new_params.target && old_params.type == new_params.type &&
        old_params.depth == new_params.depth && old_params.depth == 1 &&
-        SurfaceParams::GetFormatBpp(old_params.pixel_format) ==
-            SurfaceParams::GetFormatBpp(new_params.pixel_format)) {
+        GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
        FastCopySurface(old_surface, new_surface);
        return new_surface;
    }
@@ -1245,15 +1339,17 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    const bool is_blit{old_params.pixel_format == new_params.pixel_format};

    switch (new_params.target) {
-    case SurfaceParams::SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2D:
        if (is_blit) {
            BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
        } else {
            CopySurface(old_surface, new_surface, copy_pbo.handle);
        }
        break;
-    case SurfaceParams::SurfaceTarget::TextureCubemap:
-    case SurfaceParams::SurfaceTarget::Texture3D:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubeArray:
        AccurateCopySurface(old_surface, new_surface);
        break;
    default:
@@ -1263,7 +1359,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
    }

    return new_surface;
-} // namespace OpenGL
+}

 Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const {
    return TryGet(addr);
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <map>
 #include <memory>
+#include <string>
 #include <vector>

 #include "common/alignment.h"
@@ -18,6 +19,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/surface.h"
 #include "video_core/textures/decoders.h"
 #include "video_core/textures/texture.h"

@@ -27,126 +29,12 @@ class CachedSurface;
 using Surface = std::shared_ptr<CachedSurface>;
 using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;

+using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
+using SurfaceType = VideoCore::Surface::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+
 struct SurfaceParams {
-    enum class PixelFormat {
-        ABGR8U = 0,
-        ABGR8S = 1,
-        ABGR8UI = 2,
-        B5G6R5U = 3,
-        A2B10G10R10U = 4,
-        A1B5G5R5U = 5,
-        R8U = 6,
-        R8UI = 7,
-        RGBA16F = 8,
-        RGBA16U = 9,
-        RGBA16UI = 10,
-        R11FG11FB10F = 11,
-        RGBA32UI = 12,
-        DXT1 = 13,
-        DXT23 = 14,
-        DXT45 = 15,
-        DXN1 = 16, // This is also known as BC4
-        DXN2UNORM = 17,
-        DXN2SNORM = 18,
-        BC7U = 19,
-        BC6H_UF16 = 20,
-        BC6H_SF16 = 21,
-        ASTC_2D_4X4 = 22,
-        G8R8U = 23,
-        G8R8S = 24,
-        BGRA8 = 25,
-        RGBA32F = 26,
-        RG32F = 27,
-        R32F = 28,
-        R16F = 29,
-        R16U = 30,
-        R16S = 31,
-        R16UI = 32,
-        R16I = 33,
-        RG16 = 34,
-        RG16F = 35,
-        RG16UI = 36,
-        RG16I = 37,
-        RG16S = 38,
-        RGB32F = 39,
-        SRGBA8 = 40,
-        RG8U = 41,
-        RG8S = 42,
-        RG32UI = 43,
-        R32UI = 44,
-        ASTC_2D_8X8 = 45,
-        ASTC_2D_8X5 = 46,
-        ASTC_2D_5X4 = 47,
-
-        MaxColorFormat,
-
-        // Depth formats
-        Z32F = 48,
-        Z16 = 49,
-
-        MaxDepthFormat,
-
-        // DepthStencil formats
-        Z24S8 = 50,
-        S8Z24 = 51,
-        Z32FS8 = 52,
-
-        MaxDepthStencilFormat,
-
-        Max = MaxDepthStencilFormat,
-        Invalid = 255,
-    };
-
-    static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
-
-    enum class ComponentType {
-        Invalid = 0,
-        SNorm = 1,
-        UNorm = 2,
-        SInt = 3,
-        UInt = 4,
-        Float = 5,
-    };
-
-    enum class SurfaceType {
-        ColorTexture = 0,
-        Depth = 1,
-        DepthStencil = 2,
-        Fill = 3,
-        Invalid = 4,
-    };
-
-    enum class SurfaceTarget {
-        Texture1D,
-        Texture2D,
-        Texture3D,
-        Texture1DArray,
-        Texture2DArray,
-        TextureCubemap,
-    };
-
-    static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
-        switch (texture_type) {
-        case Tegra::Texture::TextureType::Texture1D:
-            return SurfaceTarget::Texture1D;
-        case Tegra::Texture::TextureType::Texture2D:
-        case Tegra::Texture::TextureType::Texture2DNoMipmap:
-            return SurfaceTarget::Texture2D;
-        case Tegra::Texture::TextureType::Texture3D:
-            return SurfaceTarget::Texture3D;
-        case Tegra::Texture::TextureType::TextureCubemap:
-            return SurfaceTarget::TextureCubemap;
-        case Tegra::Texture::TextureType::Texture1DArray:
-            return SurfaceTarget::Texture1DArray;
-        case Tegra::Texture::TextureType::Texture2DArray:
-            return SurfaceTarget::Texture2DArray;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
-            UNREACHABLE();
-            return SurfaceTarget::Texture2D;
-        }
-    }
-
    static std::string SurfaceTargetName(SurfaceTarget target) {
        switch (target) {
        case SurfaceTarget::Texture1D:
@@ -161,6 +49,8 @@ struct SurfaceParams {
            return "Texture2DArray";
        case SurfaceTarget::TextureCubemap:
            return "TextureCubemap";
+        case SurfaceTarget::TextureCubeArray:
+            return "TextureCubeArray";
        default:
            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
            UNREACHABLE();
@@ -168,569 +58,12 @@ struct SurfaceParams {
        }
    }

-    static bool SurfaceTargetIsLayered(SurfaceTarget target) {
-        switch (target) {
-        case SurfaceTarget::Texture1D:
-        case SurfaceTarget::Texture2D:
-        case SurfaceTarget::Texture3D:
-            return false;
-        case SurfaceTarget::Texture1DArray:
-        case SurfaceTarget::Texture2DArray:
-        case SurfaceTarget::TextureCubemap:
-            return true;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
-            UNREACHABLE();
-            return false;
-        }
-    }
-
-    /**
-     * Gets the compression factor for the specified PixelFormat. This applies to just the
-     * "compressed width" and "compressed height", not the overall compression factor of a
-     * compressed image. This is used for maintaining proper surface sizes for compressed
-     * texture formats.
-     */
-    static constexpr u32 GetCompressionFactor(PixelFormat format) {
-        if (format == PixelFormat::Invalid)
-            return 0;
-
-        constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
-            1, // ABGR8U
-            1, // ABGR8S
-            1, // ABGR8UI
-            1, // B5G6R5U
-            1, // A2B10G10R10U
-            1, // A1B5G5R5U
-            1, // R8U
-            1, // R8UI
-            1, // RGBA16F
-            1, // RGBA16U
-            1, // RGBA16UI
-            1, // R11FG11FB10F
-            1, // RGBA32UI
-            4, // DXT1
-            4, // DXT23
-            4, // DXT45
-            4, // DXN1
-            4, // DXN2UNORM
-            4, // DXN2SNORM
-            4, // BC7U
-            4, // BC6H_UF16
-            4, // BC6H_SF16
-            4, // ASTC_2D_4X4
-            1, // G8R8U
-            1, // G8R8S
-            1, // BGRA8
-            1, // RGBA32F
-            1, // RG32F
-            1, // R32F
-            1, // R16F
-            1, // R16U
-            1, // R16S
-            1, // R16UI
-            1, // R16I
-            1, // RG16
-            1, // RG16F
-            1, // RG16UI
-            1, // RG16I
-            1, // RG16S
-            1, // RGB32F
-            1, // SRGBA8
-            1, // RG8U
-            1, // RG8S
-            1, // RG32UI
-            1, // R32UI
-            4, // ASTC_2D_8X8
-            4, // ASTC_2D_8X5
-            4, // ASTC_2D_5X4
-            1, // Z32F
-            1, // Z16
-            1, // Z24S8
-            1, // S8Z24
-            1, // Z32FS8
-        }};
-
-        ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
-        return compression_factor_table[static_cast<std::size_t>(format)];
-    }
-
-    static constexpr u32 GetFormatBpp(PixelFormat format) {
-        if (format == PixelFormat::Invalid)
-            return 0;
-
-        constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
-            32,  // ABGR8U
-            32,  // ABGR8S
-            32,  // ABGR8UI
-            16,  // B5G6R5U
-            32,  // A2B10G10R10U
-            16,  // A1B5G5R5U
-            8,   // R8U
-            8,   // R8UI
-            64,  // RGBA16F
-            64,  // RGBA16U
-            64,  // RGBA16UI
-            32,  // R11FG11FB10F
-            128, // RGBA32UI
-            64,  // DXT1
-            128, // DXT23
-            128, // DXT45
-            64,  // DXN1
-            128, // DXN2UNORM
-            128, // DXN2SNORM
-            128, // BC7U
-            128, // BC6H_UF16
-            128, // BC6H_SF16
-            32,  // ASTC_2D_4X4
-            16,  // G8R8U
-            16,  // G8R8S
-            32,  // BGRA8
-            128, // RGBA32F
-            64,  // RG32F
-            32,  // R32F
-            16,  // R16F
-            16,  // R16U
-            16,  // R16S
-            16,  // R16UI
-            16,  // R16I
-            32,  // RG16
-            32,  // RG16F
-            32,  // RG16UI
-            32,  // RG16I
-            32,  // RG16S
-            96,  // RGB32F
-            32,  // SRGBA8
-            16,  // RG8U
-            16,  // RG8S
-            64,  // RG32UI
-            32,  // R32UI
-            16,  // ASTC_2D_8X8
-            32,  // ASTC_2D_8X5
-            32,  // ASTC_2D_5X4
-            32,  // Z32F
-            16,  // Z16
-            32,  // Z24S8
-            32,  // S8Z24
-            64,  // Z32FS8
-        }};
-
-        ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
-        return bpp_table[static_cast<std::size_t>(format)];
-    }
-
    u32 GetFormatBpp() const {
-        return GetFormatBpp(pixel_format);
-    }
-
-    static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
-        switch (format) {
-        case Tegra::DepthFormat::S8_Z24_UNORM:
-            return PixelFormat::S8Z24;
-        case Tegra::DepthFormat::Z24_S8_UNORM:
-            return PixelFormat::Z24S8;
-        case Tegra::DepthFormat::Z32_FLOAT:
-            return PixelFormat::Z32F;
-        case Tegra::DepthFormat::Z16_UNORM:
-            return PixelFormat::Z16;
-        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-            return PixelFormat::Z32FS8;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
-        switch (format) {
-        // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
-        // gamma.
-        case Tegra::RenderTargetFormat::RGBA8_SRGB:
-        case Tegra::RenderTargetFormat::RGBA8_UNORM:
-            return PixelFormat::ABGR8U;
-        case Tegra::RenderTargetFormat::RGBA8_SNORM:
-            return PixelFormat::ABGR8S;
-        case Tegra::RenderTargetFormat::RGBA8_UINT:
-            return PixelFormat::ABGR8UI;
-        case Tegra::RenderTargetFormat::BGRA8_SRGB:
-        case Tegra::RenderTargetFormat::BGRA8_UNORM:
-            return PixelFormat::BGRA8;
-        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-            return PixelFormat::A2B10G10R10U;
-        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-            return PixelFormat::RGBA16F;
-        case Tegra::RenderTargetFormat::RGBA16_UNORM:
-            return PixelFormat::RGBA16U;
-        case Tegra::RenderTargetFormat::RGBA16_UINT:
-            return PixelFormat::RGBA16UI;
-        case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-            return PixelFormat::RGBA32F;
-        case Tegra::RenderTargetFormat::RG32_FLOAT:
-            return PixelFormat::RG32F;
-        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-            return PixelFormat::R11FG11FB10F;
-        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-            return PixelFormat::B5G6R5U;
-        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-            return PixelFormat::A1B5G5R5U;
-        case Tegra::RenderTargetFormat::RGBA32_UINT:
-            return PixelFormat::RGBA32UI;
-        case Tegra::RenderTargetFormat::R8_UNORM:
-            return PixelFormat::R8U;
-        case Tegra::RenderTargetFormat::R8_UINT:
-            return PixelFormat::R8UI;
-        case Tegra::RenderTargetFormat::RG16_FLOAT:
-            return PixelFormat::RG16F;
-        case Tegra::RenderTargetFormat::RG16_UINT:
-            return PixelFormat::RG16UI;
-        case Tegra::RenderTargetFormat::RG16_SINT:
-            return PixelFormat::RG16I;
-        case Tegra::RenderTargetFormat::RG16_UNORM:
-            return PixelFormat::RG16;
-        case Tegra::RenderTargetFormat::RG16_SNORM:
-            return PixelFormat::RG16S;
-        case Tegra::RenderTargetFormat::RG8_UNORM:
-            return PixelFormat::RG8U;
-        case Tegra::RenderTargetFormat::RG8_SNORM:
-            return PixelFormat::RG8S;
-        case Tegra::RenderTargetFormat::R16_FLOAT:
-            return PixelFormat::R16F;
-        case Tegra::RenderTargetFormat::R16_UNORM:
-            return PixelFormat::R16U;
-        case Tegra::RenderTargetFormat::R16_SNORM:
-            return PixelFormat::R16S;
-        case Tegra::RenderTargetFormat::R16_UINT:
-            return PixelFormat::R16UI;
-        case Tegra::RenderTargetFormat::R16_SINT:
-            return PixelFormat::R16I;
-        case Tegra::RenderTargetFormat::R32_FLOAT:
-            return PixelFormat::R32F;
-        case Tegra::RenderTargetFormat::R32_UINT:
-            return PixelFormat::R32UI;
-        case Tegra::RenderTargetFormat::RG32_UINT:
-            return PixelFormat::RG32UI;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
-                                                    Tegra::Texture::ComponentType component_type) {
-        // TODO(Subv): Properly implement this
-        switch (format) {
-        case Tegra::Texture::TextureFormat::A8R8G8B8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::ABGR8U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::ABGR8S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::ABGR8UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::B5G6R5:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::B5G6R5U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::A2B10G10R10:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::A2B10G10R10U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::A1B5G5R5:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::A1B5G5R5U;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::R8U;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R8UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::G8R8:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::G8R8U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::G8R8S;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::RGBA16U;
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGBA16F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::BF10GF11RF11:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R11FG11FB10F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGBA32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RGBA32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RG32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RG32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32_G32_B32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RGB32F;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R16F;
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::R16U;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::R16S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R16UI;
-            case Tegra::Texture::ComponentType::SINT:
-                return PixelFormat::R16I;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::R32:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::R32F;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::R32UI;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::ZF32:
-            return PixelFormat::Z32F;
-        case Tegra::Texture::TextureFormat::Z16:
-            return PixelFormat::Z16;
-        case Tegra::Texture::TextureFormat::Z24S8:
-            return PixelFormat::Z24S8;
-        case Tegra::Texture::TextureFormat::DXT1:
-            return PixelFormat::DXT1;
-        case Tegra::Texture::TextureFormat::DXT23:
-            return PixelFormat::DXT23;
-        case Tegra::Texture::TextureFormat::DXT45:
-            return PixelFormat::DXT45;
-        case Tegra::Texture::TextureFormat::DXN1:
-            return PixelFormat::DXN1;
-        case Tegra::Texture::TextureFormat::DXN2:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::DXN2UNORM;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::DXN2SNORM;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        case Tegra::Texture::TextureFormat::BC7U:
-            return PixelFormat::BC7U;
-        case Tegra::Texture::TextureFormat::BC6H_UF16:
-            return PixelFormat::BC6H_UF16;
-        case Tegra::Texture::TextureFormat::BC6H_SF16:
-            return PixelFormat::BC6H_SF16;
-        case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
-            return PixelFormat::ASTC_2D_4X4;
-        case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
-            return PixelFormat::ASTC_2D_5X4;
-        case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
-            return PixelFormat::ASTC_2D_8X8;
-        case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
-            return PixelFormat::ASTC_2D_8X5;
-        case Tegra::Texture::TextureFormat::R16_G16:
-            switch (component_type) {
-            case Tegra::Texture::ComponentType::FLOAT:
-                return PixelFormat::RG16F;
-            case Tegra::Texture::ComponentType::UNORM:
-                return PixelFormat::RG16;
-            case Tegra::Texture::ComponentType::SNORM:
-                return PixelFormat::RG16S;
-            case Tegra::Texture::ComponentType::UINT:
-                return PixelFormat::RG16UI;
-            case Tegra::Texture::ComponentType::SINT:
-                return PixelFormat::RG16I;
-            }
-            LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
-                         static_cast<u32>(component_type));
-            UNREACHABLE();
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}",
-                         static_cast<u32>(format), static_cast<u32>(component_type));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
-        // TODO(Subv): Implement more component types
-        switch (type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return ComponentType::UNorm;
-        case Tegra::Texture::ComponentType::FLOAT:
-            return ComponentType::Float;
-        case Tegra::Texture::ComponentType::SNORM:
-            return ComponentType::SNorm;
-        case Tegra::Texture::ComponentType::UINT:
-            return ComponentType::UInt;
-        case Tegra::Texture::ComponentType::SINT:
-            return ComponentType::SInt;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
-        // TODO(Subv): Implement more render targets
-        switch (format) {
-        case Tegra::RenderTargetFormat::RGBA8_UNORM:
-        case Tegra::RenderTargetFormat::RGBA8_SRGB:
-        case Tegra::RenderTargetFormat::BGRA8_UNORM:
-        case Tegra::RenderTargetFormat::BGRA8_SRGB:
-        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-        case Tegra::RenderTargetFormat::R8_UNORM:
-        case Tegra::RenderTargetFormat::RG16_UNORM:
-        case Tegra::RenderTargetFormat::R16_UNORM:
-        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-        case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-        case Tegra::RenderTargetFormat::RG8_UNORM:
-        case Tegra::RenderTargetFormat::RGBA16_UNORM:
-            return ComponentType::UNorm;
-        case Tegra::RenderTargetFormat::RGBA8_SNORM:
-        case Tegra::RenderTargetFormat::RG16_SNORM:
-        case Tegra::RenderTargetFormat::R16_SNORM:
-        case Tegra::RenderTargetFormat::RG8_SNORM:
-            return ComponentType::SNorm;
-        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-        case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-        case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-        case Tegra::RenderTargetFormat::RG32_FLOAT:
-        case Tegra::RenderTargetFormat::RG16_FLOAT:
-        case Tegra::RenderTargetFormat::R16_FLOAT:
-        case Tegra::RenderTargetFormat::R32_FLOAT:
-            return ComponentType::Float;
-        case Tegra::RenderTargetFormat::RGBA32_UINT:
-        case Tegra::RenderTargetFormat::RGBA16_UINT:
-        case Tegra::RenderTargetFormat::RG16_UINT:
-        case Tegra::RenderTargetFormat::R8_UINT:
-        case Tegra::RenderTargetFormat::R16_UINT:
-        case Tegra::RenderTargetFormat::RG32_UINT:
-        case Tegra::RenderTargetFormat::R32_UINT:
-        case Tegra::RenderTargetFormat::RGBA8_UINT:
-            return ComponentType::UInt;
-        case Tegra::RenderTargetFormat::RG16_SINT:
-        case Tegra::RenderTargetFormat::R16_SINT:
-            return ComponentType::SInt;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
-        switch (format) {
-        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-            return PixelFormat::ABGR8U;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
-        switch (format) {
-        case Tegra::DepthFormat::Z16_UNORM:
-        case Tegra::DepthFormat::S8_Z24_UNORM:
-        case Tegra::DepthFormat::Z24_S8_UNORM:
-            return ComponentType::UNorm;
-        case Tegra::DepthFormat::Z32_FLOAT:
-        case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-            return ComponentType::Float;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-            UNREACHABLE();
-        }
-    }
-
-    static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
-            return SurfaceType::ColorTexture;
-        }
-
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
-            return SurfaceType::Depth;
-        }
-
-        if (static_cast<std::size_t>(pixel_format) <
-            static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
-            return SurfaceType::DepthStencil;
-        }
-
-        // TODO(Subv): Implement the other formats
-        ASSERT(false);
-
-        return SurfaceType::Invalid;
-    }
-
-    /// Returns the sizer in bytes of the specified pixel format
-    static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
-        if (pixel_format == SurfaceParams::PixelFormat::Invalid) {
-            return 0;
-        }
-        return GetFormatBpp(pixel_format) / CHAR_BIT;
+        return VideoCore::Surface::GetFormatBpp(pixel_format);
    }

    /// Returns the rectangle corresponding to this surface
-    MathUtil::Rectangle<u32> GetRect() const;
+    MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;

    /// Returns the total size of this surface in bytes, adjusted for compression
    std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -761,7 +94,7 @@ struct SurfaceParams {

    /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
    std::size_t MemorySize() const {
-        std::size_t size = InnerMemorySize(is_layered);
+        std::size_t size = InnerMemorySize(false, is_layered);
        if (is_layered)
            return size * depth;
        return size;
@@ -770,12 +103,78 @@ struct SurfaceParams {
    /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
    /// mipmaps.
    std::size_t LayerMemorySize() const {
-        return InnerMemorySize(true);
+        return InnerMemorySize(false, true);
    }

    /// Returns the size of a layer of this surface in OpenGL.
-    std::size_t LayerSizeGL() const {
-        return SizeInBytesRaw(true) / depth;
+    std::size_t LayerSizeGL(u32 mip_level) const {
+        return InnerMipmapMemorySize(mip_level, true, is_layered, false);
+    }
+
+    std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
+        std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
+        if (is_layered)
+            return size * depth;
+        return size;
+    }
+
+    std::size_t GetMipmapLevelOffset(u32 mip_level) const {
+        std::size_t offset = 0;
+        for (u32 i = 0; i < mip_level; i++)
+            offset += InnerMipmapMemorySize(i, false, is_layered);
+        return offset;
+    }
+
+    std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
+        std::size_t offset = 0;
+        for (u32 i = 0; i < mip_level; i++)
+            offset += InnerMipmapMemorySize(i, true, is_layered);
+        return offset;
+    }
+
+    u32 MipWidth(u32 mip_level) const {
+        return std::max(1U, width >> mip_level);
+    }
+
+    u32 MipHeight(u32 mip_level) const {
+        return std::max(1U, height >> mip_level);
+    }
+
+    u32 MipDepth(u32 mip_level) const {
+        return is_layered ? depth : std::max(1U, depth >> mip_level);
+    }
+
+    // Auto block resizing algorithm from:
+    // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+    u32 MipBlockHeight(u32 mip_level) const {
+        if (mip_level == 0)
+            return block_height;
+        u32 alt_height = MipHeight(mip_level);
+        u32 h = GetDefaultBlockHeight(pixel_format);
+        u32 blocks_in_y = (alt_height + h - 1) / h;
+        u32 bh = 16;
+        while (bh > 1 && blocks_in_y <= bh * 4) {
+            bh >>= 1;
+        }
+        return bh;
+    }
+
+    u32 MipBlockDepth(u32 mip_level) const {
+        if (mip_level == 0)
+            return block_depth;
+        if (is_layered)
+            return 1;
+        u32 depth = MipDepth(mip_level);
+        u32 bd = 32;
+        while (bd > 1 && depth * 2 <= bd) {
+            bd >>= 1;
+        }
+        if (bd == 32) {
+            u32 bh = MipBlockHeight(mip_level);
+            if (bh >= 4)
+                return 16;
+        }
+        return bd;
    }

    /// Creates SurfaceParams from a texture configuration
@@ -819,7 +218,7 @@ struct SurfaceParams {
    SurfaceTarget target;
    u32 max_mip_level;
    bool is_layered;
-
+    bool srgb_conversion;
    // Parameters used for caching
    VAddr addr;
    Tegra::GPUVAddr gpu_addr;
@@ -836,7 +235,10 @@ struct SurfaceParams {
    } rt;

 private:
-    std::size_t InnerMemorySize(bool layer_only = false) const;
+    std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
+                                      bool uncompressed = false) const;
+    std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
+                                bool uncompressed = false) const;
 };

 }; // namespace OpenGL
@@ -862,6 +264,8 @@ struct hash<SurfaceReserveKey> {

 namespace OpenGL {

+class RasterizerOpenGL;
+
 class CachedSurface final : public RasterizerCacheObject {
 public:
    CachedSurface(const SurfaceParams& params);
@@ -898,8 +302,10 @@ public:
    void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);

 private:
+    void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+
    OGLTexture texture;
-    std::vector<u8> gl_buffer;
+    std::vector<std::vector<u8>> gl_buffer;
    SurfaceParams params;
    GLenum gl_target;
    std::size_t cached_size_in_bytes;
@@ -907,7 +313,7 @@ private:

 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
 public:
-    RasterizerCacheOpenGL();
+    explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);

    /// Get a surface based on the texture configuration
    Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -0,0 +1,186 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+#include <glad/glad.h>
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state.h"
+
+MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
+
+namespace OpenGL {
+
+void OGLTexture::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenTextures(1, &handle);
+}
+
+void OGLTexture::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteTextures(1, &handle);
+    OpenGLState::GetCurState().UnbindTexture(handle).Apply();
+    handle = 0;
+}
+
+void OGLSampler::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenSamplers(1, &handle);
+}
+
+void OGLSampler::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteSamplers(1, &handle);
+    OpenGLState::GetCurState().ResetSampler(handle).Apply();
+    handle = 0;
+}
+
+void OGLShader::Create(const char* source, GLenum type) {
+    if (handle != 0)
+        return;
+    if (source == nullptr)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    handle = GLShader::LoadShader(source, type);
+}
+
+void OGLShader::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteShader(handle);
+    handle = 0;
+}
+
+void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
+                                  const char* frag_shader, bool separable_program) {
+    OGLShader vert, geo, frag;
+    if (vert_shader)
+        vert.Create(vert_shader, GL_VERTEX_SHADER);
+    if (geo_shader)
+        geo.Create(geo_shader, GL_GEOMETRY_SHADER);
+    if (frag_shader)
+        frag.Create(frag_shader, GL_FRAGMENT_SHADER);
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    Create(separable_program, vert.handle, geo.handle, frag.handle);
+}
+
+void OGLProgram::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteProgram(handle);
+    OpenGLState::GetCurState().ResetProgram(handle).Apply();
+    handle = 0;
+}
+
+void OGLPipeline::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenProgramPipelines(1, &handle);
+}
+
+void OGLPipeline::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteProgramPipelines(1, &handle);
+    OpenGLState::GetCurState().ResetPipeline(handle).Apply();
+    handle = 0;
+}
+
+void OGLBuffer::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenBuffers(1, &handle);
+}
+
+void OGLBuffer::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteBuffers(1, &handle);
+    OpenGLState::GetCurState().ResetBuffer(handle).Apply();
+    handle = 0;
+}
+
+void OGLSync::Create() {
+    if (handle != 0)
+        return;
+
+    // Don't profile here, this one is expected to happen ingame.
+    handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+}
+
+void OGLSync::Release() {
+    if (handle == 0)
+        return;
+
+    // Don't profile here, this one is expected to happen ingame.
+    glDeleteSync(handle);
+    handle = 0;
+}
+
+void OGLVertexArray::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenVertexArrays(1, &handle);
+}
+
+void OGLVertexArray::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteVertexArrays(1, &handle);
+    OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
+    handle = 0;
+}
+
+void OGLFramebuffer::Create() {
+    if (handle != 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+    glGenFramebuffers(1, &handle);
+}
+
+void OGLFramebuffer::Release() {
+    if (handle == 0)
+        return;
+
+    MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+    glDeleteFramebuffers(1, &handle);
+    OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
+    handle = 0;
+}
+
+} // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
 #include <glad/glad.h>
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
-#include "video_core/renderer_opengl/gl_state.h"

 namespace OpenGL {

@@ -29,20 +28,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenTextures(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteTextures(1, &handle);
-        OpenGLState::GetCurState().UnbindTexture(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -64,20 +53,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenSamplers(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteSamplers(1, &handle);
-        OpenGLState::GetCurState().ResetSampler(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -98,20 +77,9 @@ public:
        return *this;
    }

-    void Create(const char* source, GLenum type) {
-        if (handle != 0)
-            return;
-        if (source == nullptr)
-            return;
-        handle = GLShader::LoadShader(source, type);
-    }
+    void Create(const char* source, GLenum type);

-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteShader(handle);
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -141,25 +109,10 @@ public:

    /// Creates a new internal OpenGL resource and stores the handle
    void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
-                          bool separable_program = false) {
-        OGLShader vert, geo, frag;
-        if (vert_shader)
-            vert.Create(vert_shader, GL_VERTEX_SHADER);
-        if (geo_shader)
-            geo.Create(geo_shader, GL_GEOMETRY_SHADER);
-        if (frag_shader)
-            frag.Create(frag_shader, GL_FRAGMENT_SHADER);
-        Create(separable_program, vert.handle, geo.handle, frag.handle);
-    }
+                          bool separable_program = false);

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteProgram(handle);
-        OpenGLState::GetCurState().ResetProgram(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -178,20 +131,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenProgramPipelines(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteProgramPipelines(1, &handle);
-        OpenGLState::GetCurState().ResetPipeline(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -213,20 +156,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenBuffers(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteBuffers(1, &handle);
-        OpenGLState::GetCurState().ResetBuffer(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -247,19 +180,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteSync(handle);
-        handle = 0;
-    }
+    void Release();

    GLsync handle = 0;
 };
@@ -281,20 +205,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenVertexArrays(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteVertexArrays(1, &handle);
-        OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
@@ -316,20 +230,10 @@ public:
    }

    /// Creates a new internal OpenGL resource and stores the handle
-    void Create() {
-        if (handle != 0)
-            return;
-        glGenFramebuffers(1, &handle);
-    }
+    void Create();

    /// Deletes the internal OpenGL resource
-    void Release() {
-        if (handle == 0)
-            return;
-        glDeleteFramebuffers(1, &handle);
-        OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
-        handle = 0;
-    }
+    void Release();

    GLuint handle = 0;
 };
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,9 +6,10 @@
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/utils.h"
+#include "video_core/renderer_opengl/utils.h"

 namespace OpenGL {

@@ -89,7 +90,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
        shader.Create(program_result.first.c_str(), gl_type);
        program.Create(true, shader.handle);
        SetShaderUniformBlockBindings(program.handle);
-        VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
+        LabelGLObject(GL_PROGRAM, program.handle, addr);
    } else {
        // Store shader's code to lazily build it on draw
        geometry_programs.code = program_result.first;
@@ -120,20 +121,26 @@ GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
 }

 GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
-                                         const std::string& glsl_topology,
+                                         const std::string& glsl_topology, u32 max_vertices,
                                         const std::string& debug_name) {
    if (target_program.handle != 0) {
        return target_program.handle;
    }
-    const std::string source{geometry_programs.code + "layout (" + glsl_topology + ") in;\n"};
+    std::string source = "#version 430 core\n";
+    source += "layout (" + glsl_topology + ") in;\n";
+    source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
+    source += geometry_programs.code;
+
    OGLShader shader;
    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
    target_program.Create(true, shader.handle);
    SetShaderUniformBlockBindings(target_program.handle);
-    VideoCore::LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
+    LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
    return target_program.handle;
 };

+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
+
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    const VAddr program_addr{GetShaderAddress(program)};

--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -16,6 +16,8 @@
 namespace OpenGL {

 class CachedShader;
+class RasterizerOpenGL;
+
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;

@@ -46,22 +48,23 @@ public:
        }
        switch (primitive_mode) {
        case GL_POINTS:
-            return LazyGeometryProgram(geometry_programs.points, "points", "ShaderPoints");
+            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
        case GL_LINES:
        case GL_LINE_STRIP:
-            return LazyGeometryProgram(geometry_programs.lines, "lines", "ShaderLines");
+            return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
        case GL_LINES_ADJACENCY:
        case GL_LINE_STRIP_ADJACENCY:
-            return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency",
+            return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
                                       "ShaderLinesAdjacency");
        case GL_TRIANGLES:
        case GL_TRIANGLE_STRIP:
        case GL_TRIANGLE_FAN:
-            return LazyGeometryProgram(geometry_programs.triangles, "triangles", "ShaderTriangles");
+            return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
+                                       "ShaderTriangles");
        case GL_TRIANGLES_ADJACENCY:
        case GL_TRIANGLE_STRIP_ADJACENCY:
            return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
-                                       "ShaderLines");
+                                       6, "ShaderTrianglesAdjacency");
        default:
            UNREACHABLE_MSG("Unknown primitive mode.");
        }
@@ -76,7 +79,7 @@ public:
 private:
    /// Generates a geometry shader or returns one that already exists.
    GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
-                               const std::string& debug_name);
+                               u32 max_vertices, const std::string& debug_name);

    VAddr addr;
    Maxwell::ShaderProgram program_type;
@@ -104,6 +107,8 @@ private:

 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
 public:
+    explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
+
    /// Gets the current specified shader stage program
    Shader GetStageProgram(Maxwell::ShaderProgram program);
 };
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -3,12 +3,12 @@
 // Refer to the license.txt file included.

 #include <map>
+#include <optional>
 #include <set>
 #include <string>
 #include <string_view>
 #include <unordered_set>

-#include <boost/optional.hpp>
 #include <fmt/format.h>

 #include "common/assert.h"
@@ -144,7 +144,7 @@ private:
        for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
            const Instruction instr = {program_code[offset]};
            if (const auto opcode = OpCode::Decode(instr)) {
-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::EXIT: {
                    // The EXIT instruction can be predicated, which means that the shader can
                    // conditionally end on this instruction. We have to consider the case where the
@@ -341,10 +341,10 @@ public:
     */
    void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
                            u64 dest_num_components, u64 value_num_components,
-                            bool is_saturated = false, u64 dest_elem = 0) {
+                            bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {

        SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
-                    dest_num_components, value_num_components, dest_elem);
+                    dest_num_components, value_num_components, dest_elem, precise);
    }

    /**
@@ -368,11 +368,12 @@ public:
        const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};

        SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
-                    dest_num_components, value_num_components, dest_elem);
+                    dest_num_components, value_num_components, dest_elem, false);

        if (sets_cc) {
            const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
            SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+            LOG_WARNING(HW_GPU, "Control Codes Imcomplete.");
        }
    }

@@ -416,7 +417,7 @@ public:
            }
        }();

-        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem);
+        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
    }

    /**
@@ -430,7 +431,7 @@ public:
     */
    void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
                                    const Tegra::Shader::IpaMode& input_mode,
-                                    boost::optional<Register> vertex = {}) {
+                                    std::optional<Register> vertex = {}) {
        const std::string dest = GetRegisterAsFloat(reg);
        const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
        shader.AddLine(dest + " = " + src + ';');
@@ -493,10 +494,10 @@ public:
            // instruction for now.
            if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
                // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
-                // shader. These instructions use a dirty register as buffer index. To avoid some
-                // drivers from complaining for the out of boundary writes, guard them.
-                const std::string buf_index{"min(" + GetRegisterAsInteger(buf_reg) + ", " +
-                                            std::to_string(MAX_GEOMETRY_BUFFERS - 1) + ')'};
+                // shader. These instructions use a dirty register as buffer index, to avoid some
+                // drivers from complaining about out of boundary writes, guard them.
+                const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
+                                            std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
                shader.AddLine("amem[" + buf_index + "][" +
                               std::to_string(static_cast<u32>(attribute)) + ']' +
                               GetSwizzle(elem) + " = " + src + ';');
@@ -757,7 +758,8 @@ private:
     * @param dest_elem Optional, the destination element to use for the operation.
     */
    void SetRegister(const Register& reg, u64 elem, const std::string& value,
-                     u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
+                     u64 dest_num_components, u64 value_num_components, u64 dest_elem,
+                     bool precise) {
        if (reg == Register::ZeroIndex) {
            LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
            UNREACHABLE();
@@ -774,7 +776,18 @@ private:
            src += GetSwizzle(elem);
        }

-        shader.AddLine(dest + " = " + src + ';');
+        if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+            shader.AddLine('{');
+            ++shader.scope;
+            // This avoids optimizations of constant propagation and keeps the code as the original
+            // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+            shader.AddLine("precise float tmp = " + src + ';');
+            shader.AddLine(dest + " = tmp;");
+            --shader.scope;
+            shader.AddLine('}');
+        } else {
+            shader.AddLine(dest + " = " + src + ';');
+        }
    }

    /// Build the GLSL register list.
@@ -795,10 +808,14 @@ private:
    /// Generates code representing an input attribute register.
    std::string GetInputAttribute(Attribute::Index attribute,
                                  const Tegra::Shader::IpaMode& input_mode,
-                                  boost::optional<Register> vertex = {}) {
+                                  std::optional<Register> vertex = {}) {
        auto GeometryPass = [&](const std::string& name) {
            if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
-                return "gs_" + name + '[' + GetRegisterAsInteger(vertex.value(), 0, false) + ']';
+                // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set
+                // an 0x80000000 index for those and the shader fails to build. Find out why this
+                // happens and what's its intent.
+                return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) +
+                       " % MAX_VERTEX_INPUT]";
            }
            return name;
        };
@@ -1453,7 +1470,7 @@ private:
        }

        shader.AddLine(
-            fmt::format("// {}: {} (0x{:016x})", offset, opcode->GetName(), instr.value));
+            fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value));

        using Tegra::Shader::Pred;
        ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
@@ -1461,7 +1478,7 @@ private:

        // Some instructions (like SSY) don't have a predicate field, they are always
        // unconditionally executed.
-        bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId());
+        bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());

        if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
            shader.AddLine("if (" +
@@ -1471,7 +1488,7 @@ private:
            ++shader.scope;
        }

-        switch (opcode->GetType()) {
+        switch (opcode->get().GetType()) {
        case OpCode::Type::Arithmetic: {
            std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);

@@ -1488,7 +1505,7 @@ private:
                }
            }

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::MOV_C:
            case OpCode::Id::MOV_R: {
                // MOV does not have neither 'abs' nor 'neg' bits.
@@ -1510,8 +1527,13 @@ private:
                ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");

                op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
+
                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
-                                        instr.alu.saturate_d);
+                                        instr.alu.saturate_d, 0, true);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "FMUL Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::FADD_C:
@@ -1519,8 +1541,13 @@ private:
            case OpCode::Id::FADD_IMM: {
                op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
                op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
-                                        instr.alu.saturate_d);
+                                        instr.alu.saturate_d, 0, true);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "FADD Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::MUFU: {
@@ -1528,31 +1555,31 @@ private:
                switch (instr.sub_op) {
                case SubOp::Cos:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Sin:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Ex2:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Lg2:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Rcp:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Rsq:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                case SubOp::Sqrt:
                    regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d);
+                                            instr.alu.saturate_d, 0, true);
                    break;
                default:
                    LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1573,7 +1600,11 @@ private:
                regs.SetRegisterToFloat(instr.gpr0, 0,
                                        '(' + condition + ") ? min(" + parameters + ") : max(" +
                                            parameters + ')',
-                                        1, 1);
+                                        1, 1, false, 0, true);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "FMNMX Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::RRO_C:
@@ -1586,14 +1617,15 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}",
+                             opcode->get().GetName());
                UNREACHABLE();
            }
            }
            break;
        }
        case OpCode::Type::ArithmeticImmediate: {
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::MOV32_IMM: {
                regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
                break;
@@ -1602,7 +1634,11 @@ private:
                regs.SetRegisterToFloat(instr.gpr0, 0,
                                        regs.GetRegisterAsFloat(instr.gpr8) + " * " +
                                            GetImmediate32(instr),
-                                        1, 1, instr.fmul32.saturate);
+                                        1, 1, instr.fmul32.saturate, 0, true);
+                if (instr.op_32.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "FMUL32 Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::FADD32I: {
@@ -1625,7 +1661,11 @@ private:
                    op_b = "-(" + op_b + ')';
                }

-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
+                if (instr.op_32.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "FADD32 Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            }
@@ -1637,7 +1677,7 @@ private:
            std::string op_a = instr.bfe.negate_a ? "-" : "";
            op_a += regs.GetRegisterAsInteger(instr.gpr8);

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::BFE_IMM: {
                std::string inner_shift =
                    '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
@@ -1646,10 +1686,14 @@ private:
                    std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';

                regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "BFE Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -1671,7 +1715,7 @@ private:
                }
            }

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::SHR_C:
            case OpCode::Id::SHR_R:
            case OpCode::Id::SHR_IMM: {
@@ -1683,15 +1727,23 @@ private:
                // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
                                          1, 1);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "SHR Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::SHL_C:
            case OpCode::Id::SHL_R:
            case OpCode::Id::SHL_IMM:
                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "SHL Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -1701,13 +1753,17 @@ private:
            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
            std::string op_b = std::to_string(instr.alu.imm20_32.Value());

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::IADD32I:
                if (instr.iadd32i.negate_a)
                    op_a = "-(" + op_a + ')';

                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
                                          instr.iadd32i.saturate != 0);
+                if (instr.op_32.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "IADD32 Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            case OpCode::Id::LOP32I: {
                if (instr.alu.lop32i.invert_a)
@@ -1719,11 +1775,15 @@ private:
                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
                                    Tegra::Shader::PredicateResultMode::None,
                                    Tegra::Shader::Pred::UnusedIndex);
+                if (instr.op_32.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "LOP32I Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            default: {
                LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
-                             opcode->GetName());
+                             opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -1743,7 +1803,7 @@ private:
                }
            }

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::IADD_C:
            case OpCode::Id::IADD_R:
            case OpCode::Id::IADD_IMM: {
@@ -1755,6 +1815,10 @@ private:

                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
                                          instr.alu.saturate_d);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "IADD Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::IADD3_C:
@@ -1779,7 +1843,7 @@ private:
                    }
                };

-                if (opcode->GetId() == OpCode::Id::IADD3_R) {
+                if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
                    apply_height(instr.iadd3.height_a, op_a);
                    apply_height(instr.iadd3.height_b, op_b);
                    apply_height(instr.iadd3.height_c, op_c);
@@ -1795,7 +1859,7 @@ private:
                    op_c = "-(" + op_c + ')';

                std::string result;
-                if (opcode->GetId() == OpCode::Id::IADD3_R) {
+                if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
                    switch (instr.iadd3.mode) {
                    case Tegra::Shader::IAdd3Mode::RightShift:
                        // TODO(tech4me): According to
@@ -1816,6 +1880,11 @@ private:
                }

                regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1);
+
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "IADD3 Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::ISCADD_C:
@@ -1831,6 +1900,10 @@ private:

                regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "ISCADD Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::POPC_C:
@@ -1862,6 +1935,10 @@ private:

                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "LOP Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::LOP3_C:
@@ -1870,13 +1947,17 @@ private:
                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
                std::string lut;

-                if (opcode->GetId() == OpCode::Id::LOP3_R) {
+                if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
                    lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
                } else {
                    lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
                }

                WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "LOP3 Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::IMNMX_C:
@@ -1891,6 +1972,10 @@ private:
                                          '(' + condition + ") ? min(" + parameters + ") : max(" +
                                              parameters + ')',
                                          1, 1);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "IMNMX Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::LEA_R2:
@@ -1900,7 +1985,7 @@ private:
            case OpCode::Id::LEA_HI: {
                std::string op_c;

-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::LEA_R2: {
                    op_a = regs.GetRegisterAsInteger(instr.gpr20);
                    op_b = regs.GetRegisterAsInteger(instr.gpr39);
@@ -1945,7 +2030,8 @@ private:
                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
                    op_a = std::to_string(instr.lea.imm.entry_a);
                    op_c = std::to_string(instr.lea.imm.entry_b);
-                    LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+                    LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}",
+                                 opcode->get().GetName());
                    UNREACHABLE();
                }
                }
@@ -1960,7 +2046,7 @@ private:
            }
            default: {
                LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
-                             opcode->GetName());
+                             opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -1968,20 +2054,21 @@ private:
            break;
        }
        case OpCode::Type::ArithmeticHalf: {
-            if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) {
+            if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
+                opcode->get().GetId() == OpCode::Id::HADD2_R) {
                ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented");
            }
            const bool negate_a =
-                opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
+                opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
            const bool negate_b =
-                opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
+                opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;

            const std::string op_a =
                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
                             instr.alu_half.abs_a != 0, negate_a);

            std::string op_b;
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::HADD2_C:
            case OpCode::Id::HMUL2_C:
                op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -1999,7 +2086,7 @@ private:
            op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);

            const std::string result = [&]() {
-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::HADD2_C:
                case OpCode::Id::HADD2_R:
                    return '(' + op_a + " + " + op_b + ')';
@@ -2007,7 +2094,8 @@ private:
                case OpCode::Id::HMUL2_R:
                    return '(' + op_a + " * " + op_b + ')';
                default:
-                    LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName());
+                    LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}",
+                                 opcode->get().GetName());
                    UNREACHABLE();
                    return std::string("0");
                }
@@ -2018,7 +2106,7 @@ private:
            break;
        }
        case OpCode::Type::ArithmeticHalfImmediate: {
-            if (opcode->GetId() == OpCode::Id::HADD2_IMM) {
+            if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
                ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented");
            } else {
                ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None,
@@ -2032,7 +2120,7 @@ private:
            const std::string op_b = UnpackHalfImmediate(instr, true);

            const std::string result = [&]() {
-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::HADD2_IMM:
                    return op_a + " + " + op_b;
                case OpCode::Id::HMUL2_IMM:
@@ -2058,7 +2146,7 @@ private:
            ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
                       instr.ffma.tab5980_1.Value());

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::FFMA_CR: {
                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
                                        GLSLRegister::Type::Float);
@@ -2082,24 +2170,29 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }

-            regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
-                                    instr.alu.saturate_d);
+            regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
+                                    1, 1, instr.alu.saturate_d, 0, true);
+            if (instr.generates_cc) {
+                LOG_CRITICAL(HW_GPU, "FFMA Generates an unhandled Control Code");
+                UNREACHABLE();
+            }
+
            break;
        }
        case OpCode::Type::Hfma2: {
-            if (opcode->GetId() == OpCode::Id::HFMA2_RR) {
+            if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
                ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None,
                           "Unimplemented");
            } else {
                ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None,
                           "Unimplemented");
            }
-            const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR
+            const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
                                      ? instr.hfma2.rr.saturate != 0
                                      : instr.hfma2.saturate != 0;

@@ -2107,7 +2200,7 @@ private:
                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
            std::string op_b, op_c;

-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::HFMA2_CR:
                op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
                                                    GLSLRegister::Type::UnsignedInteger),
@@ -2145,7 +2238,7 @@ private:
            break;
        }
        case OpCode::Type::Conversion: {
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::I2I_R: {
                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");

@@ -2193,6 +2286,11 @@ private:
                }

                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "I2F Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::F2F_R: {
@@ -2231,6 +2329,11 @@ private:
                }

                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
+
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "F2F Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            case OpCode::Id::F2I_R:
@@ -2280,17 +2383,22 @@ private:

                regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
                                          1, false, 0, instr.conversion.dest_size);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "F2I Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}",
+                             opcode->get().GetName());
                UNREACHABLE();
            }
            }
            break;
        }
        case OpCode::Type::Memory: {
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::LD_A: {
                // Note: Shouldn't this be interp mode flat? As in no interpolation made.
                ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
@@ -2638,12 +2746,12 @@ private:
                }
                case 3: {
                    if (is_array) {
-                        UNIMPLEMENTED_MSG("3-coordinate arrays not fully implemented");
-                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                        const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
-                        coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                        texture_type = Tegra::Shader::TextureType::Texture2D;
-                        is_array = false;
+                        const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
+                        const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+                        coord =
+                            "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + ");";
                    } else {
                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
                        const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
@@ -2673,7 +2781,11 @@ private:
                    break;
                }
                case Tegra::Shader::TextureProcessMode::LZ: {
-                    texture = "textureLod(" + sampler + ", coords, 0.0)";
+                    if (depth_compare && is_array) {
+                        texture = "texture(" + sampler + ", coords)";
+                    } else {
+                        texture = "textureLod(" + sampler + ", coords, 0.0)";
+                    }
                    break;
                }
                case Tegra::Shader::TextureProcessMode::LL: {
@@ -2934,7 +3046,7 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -3028,7 +3140,7 @@ private:
                             instr.hsetp2.abs_a, instr.hsetp2.negate_a);

            const std::string op_b = [&]() {
-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::HSETP2_R:
                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
                                        instr.hsetp2.type_b, instr.hsetp2.abs_a,
@@ -3087,10 +3199,15 @@ private:
                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
            }

+            if (instr.generates_cc) {
+                LOG_CRITICAL(HW_GPU, "PSET Generates an unhandled Control Code");
+                UNREACHABLE();
+            }
+
            break;
        }
        case OpCode::Type::PredicateSetPredicate: {
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::PSETP: {
                const std::string op_a =
                    GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
@@ -3136,7 +3253,8 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}",
+                             opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -3224,7 +3342,7 @@ private:
                             instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);

            const std::string op_b = [&]() {
-                switch (opcode->GetId()) {
+                switch (opcode->get().GetId()) {
                case OpCode::Id::HSET2_R:
                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
                                        instr.hset2.type_b, instr.hset2.abs_b != 0,
@@ -3273,7 +3391,7 @@ private:
            const bool is_signed{instr.xmad.sign_a == 1};

            bool is_merge{};
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::XMAD_CR: {
                is_merge = instr.xmad.merge_56;
                op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -3302,7 +3420,7 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -3351,15 +3469,25 @@ private:
            }

            regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
+            if (instr.generates_cc) {
+                LOG_CRITICAL(HW_GPU, "XMAD Generates an unhandled Control Code");
+                UNREACHABLE();
+            }
            break;
        }
        default: {
-            switch (opcode->GetId()) {
+            switch (opcode->get().GetId()) {
            case OpCode::Id::EXIT: {
                if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
                    EmitFragmentOutputsWrite();
                }

+                const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+                if (cc != Tegra::Shader::ControlCode::T) {
+                    LOG_CRITICAL(HW_GPU, "EXIT Control Code used: {}", static_cast<u32>(cc));
+                    UNREACHABLE();
+                }
+
                switch (instr.flow.cond) {
                case Tegra::Shader::FlowCondition::Always:
                    shader.AddLine("return true;");
@@ -3389,6 +3517,11 @@ private:

                // Enclose "discard" in a conditional, so that GLSL compilation does not complain
                // about unexecuted instructions that may follow this.
+                const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+                if (cc != Tegra::Shader::ControlCode::T) {
+                    LOG_CRITICAL(HW_GPU, "KIL Control Code used: {}", static_cast<u32>(cc));
+                    UNREACHABLE();
+                }
                shader.AddLine("if (true) {");
                ++shader.scope;
                shader.AddLine("discard;");
@@ -3446,6 +3579,11 @@ private:
            case OpCode::Id::BRA: {
                ASSERT_MSG(instr.bra.constant_buffer == 0,
                           "BRA with constant buffers are not implemented");
+                const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+                if (cc != Tegra::Shader::ControlCode::T) {
+                    LOG_CRITICAL(HW_GPU, "BRA Control Code used: {}", static_cast<u32>(cc));
+                    UNREACHABLE();
+                }
                const u32 target = offset + instr.bra.GetBranchTarget();
                shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
                break;
@@ -3486,13 +3624,21 @@ private:
            }
            case OpCode::Id::SYNC: {
                // The SYNC opcode jumps to the address previously set by the SSY opcode
-                ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+                const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+                if (cc != Tegra::Shader::ControlCode::T) {
+                    LOG_CRITICAL(HW_GPU, "SYNC Control Code used: {}", static_cast<u32>(cc));
+                    UNREACHABLE();
+                }
                EmitPopFromFlowStack();
                break;
            }
            case OpCode::Id::BRK: {
                // The BRK opcode jumps to the address previously set by the PBK opcode
-                ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+                const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+                if (cc != Tegra::Shader::ControlCode::T) {
+                    LOG_CRITICAL(HW_GPU, "BRK Control Code used: {}", static_cast<u32>(cc));
+                    UNREACHABLE();
+                }
                EmitPopFromFlowStack();
                break;
            }
@@ -3522,6 +3668,11 @@ private:
                regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
                                          instr.vmad.saturate == 1, 0, Register::Size::Word,
                                          instr.vmad.cc);
+                if (instr.generates_cc) {
+                    LOG_CRITICAL(HW_GPU, "VMAD Generates an unhandled Control Code");
+                    UNREACHABLE();
+                }
+
                break;
            }
            case OpCode::Id::VSETP: {
@@ -3549,7 +3700,7 @@ private:
                break;
            }
            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
+                LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->get().GetName());
                UNREACHABLE();
            }
            }
@@ -3690,9 +3841,9 @@ std::string GetCommonDeclarations() {
                       RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4));
 }

-boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                                Maxwell3D::Regs::ShaderStage stage,
-                                                const std::string& suffix) {
+std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+                                              Maxwell3D::Regs::ShaderStage stage,
+                                              const std::string& suffix) {
    try {
        const auto subroutines =
            ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
@@ -3701,7 +3852,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
    } catch (const DecompileFail& exception) {
        LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
    }
-    return boost::none;
+    return {};
 }

 } // namespace OpenGL::GLShader::Decompiler
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,8 +6,8 @@

 #include <array>
 #include <functional>
+#include <optional>
 #include <string>
-#include <boost/optional.hpp>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -18,8 +18,8 @@ using Tegra::Engines::Maxwell3D;

 std::string GetCommonDeclarations();

-boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                                Maxwell3D::Regs::ShaderStage stage,
-                                                const std::string& suffix);
+std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+                                              Maxwell3D::Regs::ShaderStage stage,
+                                              const std::string& suffix);

 } // namespace OpenGL::GLShader::Decompiler
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -37,7 +37,7 @@ layout(std140) uniform vs_config {
    ProgramResult program =
        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
                                     Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
-            .get_value_or({});
+            .value_or(ProgramResult());

    out += program.first;

@@ -45,7 +45,7 @@ layout(std140) uniform vs_config {
        ProgramResult program_b =
            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
-                .get_value_or({});
+                .value_or(ProgramResult());
        out += program_b.first;
    }

@@ -82,15 +82,15 @@ void main() {
 }

 ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
-    std::string out = "#version 430 core\n";
-    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
+    std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
    out += Decompiler::GetCommonDeclarations();
    out += "bool exec_geometry();\n";

    ProgramResult program =
        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
                                     Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
-            .get_value_or({});
+            .value_or(ProgramResult());
    out += R"(
 out gl_PerVertex {
    vec4 gl_Position;
@@ -124,7 +124,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
    ProgramResult program =
        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
                                     Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
-            .get_value_or({});
+            .value_or(ProgramResult());
    out += R"(
 layout(location = 0) out vec4 FragColor0;
 layout(location = 1) out vec4 FragColor1;
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -7,6 +7,7 @@
 #include <glad/glad.h>

 #include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"

 namespace OpenGL::GLShader {
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,9 +11,10 @@
 namespace OpenGL {

 OpenGLState OpenGLState::cur_state;
-
+bool OpenGLState::s_rgb_used;
 OpenGLState::OpenGLState() {
    // These all match default OpenGL values
+    framebuffer_srgb.enabled = false;
    cull.enabled = false;
    cull.mode = GL_BACK;
    cull.front_face = GL_CCW;
@@ -21,14 +22,15 @@ OpenGLState::OpenGLState() {
    depth.test_enabled = false;
    depth.test_func = GL_LESS;
    depth.write_mask = GL_TRUE;
-    depth.depth_range_near = 0.0f;
-    depth.depth_range_far = 1.0f;
-
-    color_mask.red_enabled = GL_TRUE;
-    color_mask.green_enabled = GL_TRUE;
-    color_mask.blue_enabled = GL_TRUE;
-    color_mask.alpha_enabled = GL_TRUE;

+    primitive_restart.enabled = false;
+    primitive_restart.index = 0;
+    for (auto& item : color_mask) {
+        item.red_enabled = GL_TRUE;
+        item.green_enabled = GL_TRUE;
+        item.blue_enabled = GL_TRUE;
+        item.alpha_enabled = GL_TRUE;
+    }
    stencil.test_enabled = false;
    auto reset_stencil = [](auto& config) {
        config.test_func = GL_ALWAYS;
@@ -41,19 +43,33 @@ OpenGLState::OpenGLState() {
    };
    reset_stencil(stencil.front);
    reset_stencil(stencil.back);
-
-    blend.enabled = true;
-    blend.rgb_equation = GL_FUNC_ADD;
-    blend.a_equation = GL_FUNC_ADD;
-    blend.src_rgb_func = GL_ONE;
-    blend.dst_rgb_func = GL_ZERO;
-    blend.src_a_func = GL_ONE;
-    blend.dst_a_func = GL_ZERO;
-    blend.color.red = 0.0f;
-    blend.color.green = 0.0f;
-    blend.color.blue = 0.0f;
-    blend.color.alpha = 0.0f;
-
+    for (auto& item : viewports) {
+        item.x = 0;
+        item.y = 0;
+        item.width = 0;
+        item.height = 0;
+        item.depth_range_near = 0.0f;
+        item.depth_range_far = 1.0f;
+    }
+    scissor.enabled = false;
+    scissor.x = 0;
+    scissor.y = 0;
+    scissor.width = 0;
+    scissor.height = 0;
+    for (auto& item : blend) {
+        item.enabled = true;
+        item.rgb_equation = GL_FUNC_ADD;
+        item.a_equation = GL_FUNC_ADD;
+        item.src_rgb_func = GL_ONE;
+        item.dst_rgb_func = GL_ZERO;
+        item.src_a_func = GL_ONE;
+        item.dst_a_func = GL_ZERO;
+    }
+    independant_blend.enabled = false;
+    blend_color.red = 0.0f;
+    blend_color.green = 0.0f;
+    blend_color.blue = 0.0f;
+    blend_color.alpha = 0.0f;
    logic_op.enabled = false;
    logic_op.operation = GL_COPY;

@@ -69,144 +85,308 @@ OpenGLState::OpenGLState() {
    draw.shader_program = 0;
    draw.program_pipeline = 0;

-    scissor.enabled = false;
-    scissor.x = 0;
-    scissor.y = 0;
-    scissor.width = 0;
-    scissor.height = 0;
-
-    viewport.x = 0;
-    viewport.y = 0;
-    viewport.width = 0;
-    viewport.height = 0;
-
    clip_distance = {};

    point.size = 1;
 }

-void OpenGLState::Apply() const {
+void OpenGLState::ApplyDefaultState() {
+    glDisable(GL_FRAMEBUFFER_SRGB);
+    glDisable(GL_CULL_FACE);
+    glDisable(GL_DEPTH_TEST);
+    glDisable(GL_PRIMITIVE_RESTART);
+    glDisable(GL_STENCIL_TEST);
+    glEnable(GL_BLEND);
+    glDisable(GL_COLOR_LOGIC_OP);
+    glDisable(GL_SCISSOR_TEST);
+}
+
+void OpenGLState::ApplySRgb() const {
+    // sRGB
+    if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
+        if (framebuffer_srgb.enabled) {
+            // Track if sRGB is used
+            s_rgb_used = true;
+            glEnable(GL_FRAMEBUFFER_SRGB);
+        } else {
+            glDisable(GL_FRAMEBUFFER_SRGB);
+        }
+    }
+}
+
+void OpenGLState::ApplyCulling() const {
    // Culling
-    if (cull.enabled != cur_state.cull.enabled) {
+    const bool cull_changed = cull.enabled != cur_state.cull.enabled;
+    if (cull_changed) {
        if (cull.enabled) {
            glEnable(GL_CULL_FACE);
        } else {
            glDisable(GL_CULL_FACE);
        }
    }
+    if (cull.enabled) {
+        if (cull_changed || cull.mode != cur_state.cull.mode) {
+            glCullFace(cull.mode);
+        }

-    if (cull.mode != cur_state.cull.mode) {
-        glCullFace(cull.mode);
+        if (cull_changed || cull.front_face != cur_state.cull.front_face) {
+            glFrontFace(cull.front_face);
+        }
    }
+}

-    if (cull.front_face != cur_state.cull.front_face) {
-        glFrontFace(cull.front_face);
+void OpenGLState::ApplyColorMask() const {
+    if (GLAD_GL_ARB_viewport_array) {
+        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+            const auto& updated = color_mask[i];
+            const auto& current = cur_state.color_mask[i];
+            if (updated.red_enabled != current.red_enabled ||
+                updated.green_enabled != current.green_enabled ||
+                updated.blue_enabled != current.blue_enabled ||
+                updated.alpha_enabled != current.alpha_enabled) {
+                glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+                             updated.blue_enabled, updated.alpha_enabled);
+            }
+        }
+    } else {
+        const auto& updated = color_mask[0];
+        const auto& current = cur_state.color_mask[0];
+        if (updated.red_enabled != current.red_enabled ||
+            updated.green_enabled != current.green_enabled ||
+            updated.blue_enabled != current.blue_enabled ||
+            updated.alpha_enabled != current.alpha_enabled) {
+            glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
+                        updated.alpha_enabled);
+        }
    }
+}

+void OpenGLState::ApplyDepth() const {
    // Depth test
-    if (depth.test_enabled != cur_state.depth.test_enabled) {
+    const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
+    if (depth_test_changed) {
        if (depth.test_enabled) {
            glEnable(GL_DEPTH_TEST);
        } else {
            glDisable(GL_DEPTH_TEST);
        }
    }
-
-    if (depth.test_func != cur_state.depth.test_func) {
+    if (depth.test_enabled &&
+        (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
        glDepthFunc(depth.test_func);
    }
-
    // Depth mask
    if (depth.write_mask != cur_state.depth.write_mask) {
        glDepthMask(depth.write_mask);
    }
+}

-    // Depth range
-    if (depth.depth_range_near != cur_state.depth.depth_range_near ||
-        depth.depth_range_far != cur_state.depth.depth_range_far) {
-        glDepthRange(depth.depth_range_near, depth.depth_range_far);
+void OpenGLState::ApplyPrimitiveRestart() const {
+    const bool primitive_restart_changed =
+        primitive_restart.enabled != cur_state.primitive_restart.enabled;
+    if (primitive_restart_changed) {
+        if (primitive_restart.enabled) {
+            glEnable(GL_PRIMITIVE_RESTART);
+        } else {
+            glDisable(GL_PRIMITIVE_RESTART);
+        }
    }
-
-    // Color mask
-    if (color_mask.red_enabled != cur_state.color_mask.red_enabled ||
-        color_mask.green_enabled != cur_state.color_mask.green_enabled ||
-        color_mask.blue_enabled != cur_state.color_mask.blue_enabled ||
-        color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) {
-        glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled,
-                    color_mask.alpha_enabled);
+    if (primitive_restart_changed ||
+        (primitive_restart.enabled &&
+         primitive_restart.index != cur_state.primitive_restart.index)) {
+        glPrimitiveRestartIndex(primitive_restart.index);
    }
+}

-    // Stencil test
-    if (stencil.test_enabled != cur_state.stencil.test_enabled) {
+void OpenGLState::ApplyStencilTest() const {
+    const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
+    if (stencil_test_changed) {
        if (stencil.test_enabled) {
            glEnable(GL_STENCIL_TEST);
        } else {
            glDisable(GL_STENCIL_TEST);
        }
    }
-    auto config_stencil = [](GLenum face, const auto& config, const auto& prev_config) {
-        if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
-            config.test_mask != prev_config.test_mask) {
-            glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
-        }
-        if (config.action_depth_fail != prev_config.action_depth_fail ||
-            config.action_depth_pass != prev_config.action_depth_pass ||
-            config.action_stencil_fail != prev_config.action_stencil_fail) {
-            glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
-                                config.action_depth_pass);
-        }
-        if (config.write_mask != prev_config.write_mask) {
-            glStencilMaskSeparate(face, config.write_mask);
-        }
-    };
-    config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
-    config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
+    if (stencil.test_enabled) {
+        auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
+                                                     const auto& prev_config) {
+            if (stencil_test_changed || config.test_func != prev_config.test_func ||
+                config.test_ref != prev_config.test_ref ||
+                config.test_mask != prev_config.test_mask) {
+                glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+            }
+            if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
+                config.action_depth_pass != prev_config.action_depth_pass ||
+                config.action_stencil_fail != prev_config.action_stencil_fail) {
+                glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+                                    config.action_depth_pass);
+            }
+            if (config.write_mask != prev_config.write_mask) {
+                glStencilMaskSeparate(face, config.write_mask);
+            }
+        };
+        config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+        config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
+    }
+}

-    // Blending
-    if (blend.enabled != cur_state.blend.enabled) {
-        if (blend.enabled) {
-            ASSERT(!logic_op.enabled);
+void OpenGLState::ApplyScissor() const {
+    const bool scissor_changed = scissor.enabled != cur_state.scissor.enabled;
+    if (scissor_changed) {
+        if (scissor.enabled) {
+            glEnable(GL_SCISSOR_TEST);
+        } else {
+            glDisable(GL_SCISSOR_TEST);
+        }
+    }
+    if (scissor.enabled &&
+        (scissor_changed || scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
+         scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height)) {
+        glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
+    }
+}
+
+void OpenGLState::ApplyViewport() const {
+    if (GLAD_GL_ARB_viewport_array) {
+        for (GLuint i = 0;
+             i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); i++) {
+            const auto& current = cur_state.viewports[i];
+            const auto& updated = viewports[i];
+            if (updated.x != current.x || updated.y != current.y ||
+                updated.width != current.width || updated.height != current.height) {
+                glViewportIndexedf(i, updated.x, updated.y, updated.width, updated.height);
+            }
+            if (updated.depth_range_near != current.depth_range_near ||
+                updated.depth_range_far != current.depth_range_far) {
+                glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
+            }
+        }
+    } else {
+        const auto& current = cur_state.viewports[0];
+        const auto& updated = viewports[0];
+        if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
+            updated.height != current.height) {
+            glViewport(updated.x, updated.y, updated.width, updated.height);
+        }
+        if (updated.depth_range_near != current.depth_range_near ||
+            updated.depth_range_far != current.depth_range_far) {
+            glDepthRange(updated.depth_range_near, updated.depth_range_far);
+        }
+    }
+}
+
+void OpenGLState::ApplyGlobalBlending() const {
+    const Blend& current = cur_state.blend[0];
+    const Blend& updated = blend[0];
+    const bool blend_changed = updated.enabled != current.enabled;
+    if (blend_changed) {
+        if (updated.enabled) {
            glEnable(GL_BLEND);
        } else {
            glDisable(GL_BLEND);
        }
    }
-
-    if (blend.color.red != cur_state.blend.color.red ||
-        blend.color.green != cur_state.blend.color.green ||
-        blend.color.blue != cur_state.blend.color.blue ||
-        blend.color.alpha != cur_state.blend.color.alpha) {
-        glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha);
+    if (!updated.enabled) {
+        return;
    }
+    if (updated.separate_alpha) {
+        if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+            updated.dst_rgb_func != current.dst_rgb_func ||
+            updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
+            glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+                                updated.dst_a_func);
+        }

-    if (blend.src_rgb_func != cur_state.blend.src_rgb_func ||
-        blend.dst_rgb_func != cur_state.blend.dst_rgb_func ||
-        blend.src_a_func != cur_state.blend.src_a_func ||
-        blend.dst_a_func != cur_state.blend.dst_a_func) {
-        glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func,
-                            blend.dst_a_func);
+        if (blend_changed || updated.rgb_equation != current.rgb_equation ||
+            updated.a_equation != current.a_equation) {
+            glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
+        }
+    } else {
+        if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+            updated.dst_rgb_func != current.dst_rgb_func) {
+            glBlendFunc(updated.src_rgb_func, updated.dst_rgb_func);
+        }
+
+        if (blend_changed || updated.rgb_equation != current.rgb_equation) {
+            glBlendEquation(updated.rgb_equation);
+        }
    }
+}

-    if (blend.rgb_equation != cur_state.blend.rgb_equation ||
-        blend.a_equation != cur_state.blend.a_equation) {
-        glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
+void OpenGLState::ApplyTargetBlending(int target, bool force) const {
+    const Blend& updated = blend[target];
+    const Blend& current = cur_state.blend[target];
+    const bool blend_changed = updated.enabled != current.enabled || force;
+    if (blend_changed) {
+        if (updated.enabled) {
+            glEnablei(GL_BLEND, static_cast<GLuint>(target));
+        } else {
+            glDisablei(GL_BLEND, static_cast<GLuint>(target));
+        }
    }
+    if (!updated.enabled) {
+        return;
+    }
+    if (updated.separate_alpha) {
+        if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+            updated.dst_rgb_func != current.dst_rgb_func ||
+            updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
+            glBlendFuncSeparateiARB(static_cast<GLuint>(target), updated.src_rgb_func,
+                                    updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
+        }

-    // Logic Operation
-    if (logic_op.enabled != cur_state.logic_op.enabled) {
+        if (blend_changed || updated.rgb_equation != current.rgb_equation ||
+            updated.a_equation != current.a_equation) {
+            glBlendEquationSeparateiARB(static_cast<GLuint>(target), updated.rgb_equation,
+                                        updated.a_equation);
+        }
+    } else {
+        if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+            updated.dst_rgb_func != current.dst_rgb_func) {
+            glBlendFunciARB(static_cast<GLuint>(target), updated.src_rgb_func,
+                            updated.dst_rgb_func);
+        }
+
+        if (blend_changed || updated.rgb_equation != current.rgb_equation) {
+            glBlendEquationiARB(static_cast<GLuint>(target), updated.rgb_equation);
+        }
+    }
+}
+
+void OpenGLState::ApplyBlending() const {
+    if (independant_blend.enabled) {
+        for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+            ApplyTargetBlending(i,
+                                independant_blend.enabled != cur_state.independant_blend.enabled);
+        }
+    } else {
+        ApplyGlobalBlending();
+    }
+    if (blend_color.red != cur_state.blend_color.red ||
+        blend_color.green != cur_state.blend_color.green ||
+        blend_color.blue != cur_state.blend_color.blue ||
+        blend_color.alpha != cur_state.blend_color.alpha) {
+        glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
+    }
+}
+
+void OpenGLState::ApplyLogicOp() const {
+    const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
+    if (logic_op_changed) {
        if (logic_op.enabled) {
-            ASSERT(!blend.enabled);
            glEnable(GL_COLOR_LOGIC_OP);
        } else {
            glDisable(GL_COLOR_LOGIC_OP);
        }
    }

-    if (logic_op.operation != cur_state.logic_op.operation) {
+    if (logic_op.enabled &&
+        (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
        glLogicOp(logic_op.operation);
    }
+}

-    // Textures
+void OpenGLState::ApplyTextures() const {
    for (std::size_t i = 0; i < std::size(texture_units); ++i) {
        const auto& texture_unit = texture_units[i];
        const auto& cur_state_texture_unit = cur_state.texture_units[i];
@@ -225,28 +405,29 @@ void OpenGLState::Apply() const {
            glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
        }
    }
+}

-    // Samplers
-    {
-        bool has_delta{};
-        std::size_t first{}, last{};
-        std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
-        for (std::size_t i = 0; i < std::size(samplers); ++i) {
-            samplers[i] = texture_units[i].sampler;
-            if (samplers[i] != cur_state.texture_units[i].sampler) {
-                if (!has_delta) {
-                    first = i;
-                    has_delta = true;
-                }
-                last = i;
+void OpenGLState::ApplySamplers() const {
+    bool has_delta{};
+    std::size_t first{}, last{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+    for (std::size_t i = 0; i < std::size(samplers); ++i) {
+        samplers[i] = texture_units[i].sampler;
+        if (samplers[i] != cur_state.texture_units[i].sampler) {
+            if (!has_delta) {
+                first = i;
+                has_delta = true;
            }
-        }
-        if (has_delta) {
-            glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
-                           samplers.data());
+            last = i;
        }
    }
+    if (has_delta) {
+        glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+                       samplers.data());
+    }
+}

+void OpenGLState::ApplyFramebufferState() const {
    // Framebuffer
    if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
        glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -254,7 +435,9 @@ void OpenGLState::Apply() const {
    if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
    }
+}

+void OpenGLState::ApplyVertexBufferState() const {
    // Vertex array
    if (draw.vertex_array != cur_state.draw.vertex_array) {
        glBindVertexArray(draw.vertex_array);
@@ -264,7 +447,11 @@ void OpenGLState::Apply() const {
    if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
        glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
    }
+}

+void OpenGLState::Apply() const {
+    ApplyFramebufferState();
+    ApplyVertexBufferState();
    // Uniform buffer
    if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
        glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);
@@ -279,27 +466,6 @@ void OpenGLState::Apply() const {
    if (draw.program_pipeline != cur_state.draw.program_pipeline) {
        glBindProgramPipeline(draw.program_pipeline);
    }
-
-    // Scissor test
-    if (scissor.enabled != cur_state.scissor.enabled) {
-        if (scissor.enabled) {
-            glEnable(GL_SCISSOR_TEST);
-        } else {
-            glDisable(GL_SCISSOR_TEST);
-        }
-    }
-
-    if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
-        scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) {
-        glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
-    }
-
-    if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y ||
-        viewport.width != cur_state.viewport.width ||
-        viewport.height != cur_state.viewport.height) {
-        glViewport(viewport.x, viewport.y, viewport.width, viewport.height);
-    }
-
    // Clip distance
    for (std::size_t i = 0; i < clip_distance.size(); ++i) {
        if (clip_distance[i] != cur_state.clip_distance[i]) {
@@ -310,12 +476,22 @@ void OpenGLState::Apply() const {
            }
        }
    }
-
    // Point
    if (point.size != cur_state.point.size) {
        glPointSize(point.size);
    }
-
+    ApplyColorMask();
+    ApplyViewport();
+    ApplyScissor();
+    ApplyStencilTest();
+    ApplySRgb();
+    ApplyCulling();
+    ApplyDepth();
+    ApplyPrimitiveRestart();
+    ApplyBlending();
+    ApplyLogicOp();
+    ApplyTextures();
+    ApplySamplers();
    cur_state = *this;
 }

--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -35,6 +35,10 @@ constexpr TextureUnit ProcTexDiffLUT{9};

 class OpenGLState {
 public:
+    struct {
+        bool enabled; // GL_FRAMEBUFFER_SRGB
+    } framebuffer_srgb;
+
    struct {
        bool enabled;      // GL_CULL_FACE
        GLenum mode;       // GL_CULL_FACE_MODE
@@ -42,20 +46,24 @@ public:
    } cull;

    struct {
-        bool test_enabled;        // GL_DEPTH_TEST
-        GLenum test_func;         // GL_DEPTH_FUNC
-        GLboolean write_mask;     // GL_DEPTH_WRITEMASK
-        GLfloat depth_range_near; // GL_DEPTH_RANGE
-        GLfloat depth_range_far;  // GL_DEPTH_RANGE
+        bool test_enabled;    // GL_DEPTH_TEST
+        GLenum test_func;     // GL_DEPTH_FUNC
+        GLboolean write_mask; // GL_DEPTH_WRITEMASK
    } depth;

    struct {
+        bool enabled;
+        GLuint index;
+    } primitive_restart; // GL_PRIMITIVE_RESTART
+
+    struct ColorMask {
        GLboolean red_enabled;
        GLboolean green_enabled;
        GLboolean blue_enabled;
        GLboolean alpha_enabled;
-    } color_mask; // GL_COLOR_WRITEMASK
-
+    };
+    std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+        color_mask; // GL_COLOR_WRITEMASK
    struct {
        bool test_enabled; // GL_STENCIL_TEST
        struct {
@@ -69,22 +77,28 @@ public:
        } front, back;
    } stencil;

-    struct {
+    struct Blend {
        bool enabled;        // GL_BLEND
+        bool separate_alpha; // Independent blend enabled
        GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
        GLenum a_equation;   // GL_BLEND_EQUATION_ALPHA
        GLenum src_rgb_func; // GL_BLEND_SRC_RGB
        GLenum dst_rgb_func; // GL_BLEND_DST_RGB
        GLenum src_a_func;   // GL_BLEND_SRC_ALPHA
        GLenum dst_a_func;   // GL_BLEND_DST_ALPHA
+    };
+    std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;

-        struct {
-            GLclampf red;
-            GLclampf green;
-            GLclampf blue;
-            GLclampf alpha;
-        } color; // GL_BLEND_COLOR
-    } blend;
+    struct {
+        bool enabled;
+    } independant_blend;
+
+    struct {
+        GLclampf red;
+        GLclampf green;
+        GLclampf blue;
+        GLclampf alpha;
+    } blend_color; // GL_BLEND_COLOR

    struct {
        bool enabled; // GL_LOGIC_OP_MODE
@@ -129,6 +143,16 @@ public:
        GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
    } draw;

+    struct viewport {
+        GLfloat x;
+        GLfloat y;
+        GLfloat width;
+        GLfloat height;
+        GLfloat depth_range_near; // GL_DEPTH_RANGE
+        GLfloat depth_range_far;  // GL_DEPTH_RANGE
+    };
+    std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> viewports;
+
    struct {
        bool enabled; // GL_SCISSOR_TEST
        GLint x;
@@ -137,13 +161,6 @@ public:
        GLsizei height;
    } scissor;

-    struct {
-        GLint x;
-        GLint y;
-        GLsizei width;
-        GLsizei height;
-    } viewport;
-
    struct {
        float size; // GL_POINT_SIZE
    } point;
@@ -156,10 +173,20 @@ public:
    static OpenGLState GetCurState() {
        return cur_state;
    }
-
+    static bool GetsRGBUsed() {
+        return s_rgb_used;
+    }
+    static void ClearsRGBUsed() {
+        s_rgb_used = false;
+    }
    /// Apply this state as the current OpenGL state
    void Apply() const;
-
+    /// Apply only the state afecting the framebuffer
+    void ApplyFramebufferState() const;
+    /// Apply only the state afecting the vertex buffer
+    void ApplyVertexBufferState() const;
+    /// Set the initial OpenGL state
+    static void ApplyDefaultState();
    /// Resets any references to the given resource
    OpenGLState& UnbindTexture(GLuint handle);
    OpenGLState& ResetSampler(GLuint handle);
@@ -171,6 +198,23 @@ public:

 private:
    static OpenGLState cur_state;
+    // Workaround for sRGB problems caused by
+    // QT not supporting srgb output
+    static bool s_rgb_used;
+    void ApplySRgb() const;
+    void ApplyCulling() const;
+    void ApplyColorMask() const;
+    void ApplyDepth() const;
+    void ApplyPrimitiveRestart() const;
+    void ApplyStencilTest() const;
+    void ApplyViewport() const;
+    void ApplyTargetBlending(int target, bool force) const;
+    void ApplyGlobalBlending() const;
+    void ApplyBlending() const;
+    void ApplyLogicOp() const;
+    void ApplyTextures() const;
+    void ApplySamplers() const;
+    void ApplyScissor() const;
 };

 } // namespace OpenGL
--- a/Show More
+++ b/Show More